Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Boltz79 commited on Feb 8

Commit

cc50c45

verified ·

1 Parent(s): 4667629

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -30

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py
 import gradio as gr
 import librosa
 import numpy as np
@@ -44,13 +43,7 @@ classifier = foreign_class(
 )
 def preprocess_audio(audio_file, apply_noise_reduction=False):
-    """
-    Load and preprocess the audio file:
-      - Convert to 16kHz mono.
-      - Optionally apply noise reduction.
-      - Normalize the audio.
-    Saves the processed audio to a temporary file and returns its path.
-    """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     if apply_noise_reduction and NOISEREDUCE_AVAILABLE:
         y = nr.reduce_noise(y=y, sr=sr)
@@ -62,19 +55,15 @@ def preprocess_audio(audio_file, apply_noise_reduction=False):
     return temp_file.name
 def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
-    """
-    For longer audio files, split into overlapping segments, predict each segment,
-    and return the majority-voted emotion label.
-    """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     total_duration = librosa.get_duration(y=y, sr=sr)
-    # If the audio is short, process it directly
     if total_duration <= segment_duration:
         temp_file = preprocess_audio(audio_file, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
         os.remove(temp_file)
-        return label
     step = segment_duration - overlap
     segments = []
@@ -91,7 +80,7 @@ def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duratio
     for seg in segments:
         temp_file = preprocess_audio(seg, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
-        predictions.append(label)
         os.remove(temp_file)
         os.remove(seg)
@@ -100,6 +89,7 @@ def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duratio
     return most_common
 def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
     try:
         if use_ensemble:
             label = ensemble_prediction(audio_file, apply_noise_reduction, segment_duration, overlap)
@@ -109,19 +99,16 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
             os.remove(temp_file)
             if isinstance(result, tuple) and len(result) > 3:
-                label = result[3][0]  # Extract the predicted label (e.g., 'hap')
             else:
                 label = str(result)  # Convert to string if unexpected format
-        return add_emoji_to_label(label.lower())  # Ensure lowercase for consistency
     except Exception as e:
         return f"Error processing file: {str(e)}"
 def plot_waveform(audio_file):
-    """
-    Generate and return a waveform plot image (as a PIL Image) for the given audio file.
-    """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
     librosa.display.waveshow(y, sr=sr)
@@ -130,15 +117,10 @@ def plot_waveform(audio_file):
     plt.savefig(buf, format="png")
     plt.close()
     buf.seek(0)
-    # Convert buffer to PIL Image
-    image = Image.open(buf)
-    return image
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
-    """
-    Run emotion prediction and generate a waveform plot.
-    Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
-    """
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
     return emotion, waveform
@@ -147,7 +129,7 @@ def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_du
 with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
     gr.Markdown("<h1 style='text-align: center;'>Enhanced Emotion Recognition 😊</h1>")
     gr.Markdown(
-        "Upload an audio file and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
         "The prediction is accompanied by an emoji, and you can also view the audio's waveform. "
         "Use the options below to adjust ensemble prediction and noise reduction settings."
     )
@@ -163,7 +145,6 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
                 overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
             predict_button = gr.Button("Predict Emotion")
             result_text = gr.Textbox(label="Predicted Emotion")
-            # Set type to "pil" since we are returning a PIL Image
             waveform_image = gr.Image(label="Audio Waveform", type="pil")
             predict_button.click(

 import gradio as gr
 import librosa
 import numpy as np
 )
 def preprocess_audio(audio_file, apply_noise_reduction=False):
+    """Load and preprocess the audio file: convert to 16kHz mono, optionally apply noise reduction, and normalize."""
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     if apply_noise_reduction and NOISEREDUCE_AVAILABLE:
         y = nr.reduce_noise(y=y, sr=sr)
     return temp_file.name
 def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
+    """Split longer audio files into overlapping segments, predict each segment, and return the majority-voted emotion label."""
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     total_duration = librosa.get_duration(y=y, sr=sr)
     if total_duration <= segment_duration:
         temp_file = preprocess_audio(audio_file, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
         os.remove(temp_file)
+        return label[0]
     step = segment_duration - overlap
     segments = []
     for seg in segments:
         temp_file = preprocess_audio(seg, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
+        predictions.append(label[0])  # Extract the predicted emotion
         os.remove(temp_file)
         os.remove(seg)
     return most_common
 def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
+    """Predict emotion from an audio file and return the emotion with an emoji."""
     try:
         if use_ensemble:
             label = ensemble_prediction(audio_file, apply_noise_reduction, segment_duration, overlap)
             os.remove(temp_file)
             if isinstance(result, tuple) and len(result) > 3:
+                label = result[3][0]  # Extract the predicted emotion label
             else:
                 label = str(result)  # Convert to string if unexpected format
+        return add_emoji_to_label(label.lower())  # Format and add an emoji
     except Exception as e:
         return f"Error processing file: {str(e)}"
 def plot_waveform(audio_file):
+    """Generate and return a waveform plot image (as a PIL Image) for the given audio file."""
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
     librosa.display.waveshow(y, sr=sr)
     plt.savefig(buf, format="png")
     plt.close()
     buf.seek(0)
+    return Image.open(buf)
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
+    """Run emotion prediction and generate a waveform plot."""
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
     return emotion, waveform
 with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
     gr.Markdown("<h1 style='text-align: center;'>Enhanced Emotion Recognition 😊</h1>")
     gr.Markdown(
+        "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
         "The prediction is accompanied by an emoji, and you can also view the audio's waveform. "
         "Use the options below to adjust ensemble prediction and noise reduction settings."
     )
                 overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
             predict_button = gr.Button("Predict Emotion")
             result_text = gr.Textbox(label="Predicted Emotion")
             waveform_image = gr.Image(label="Audio Waveform", type="pil")
             predict_button.click(