Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Boltz79 commited on Jan 31

Commit

ba147ac

verified ·

1 Parent(s): ddf32d8

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -41

app.py CHANGED Viewed

@@ -12,19 +12,15 @@ class EmotionRecognizer:
             model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
             device=0 if torch.cuda.is_available() else -1
         )
-        self.target_sr = 16000  # Target sample rate for the model
-        self.max_duration = 10  # Max audio duration in seconds
     def process_audio(self, audio_path):
         try:
-            # Load audio file using soundfile (works better in Hugging Face Spaces)
             audio, orig_sr = sf.read(audio_path)
-            # Convert stereo to mono if needed
             if len(audio.shape) > 1:
                 audio = np.mean(audio, axis=1)
-            # Resample if necessary
             if orig_sr != self.target_sr:
                 audio = librosa.resample(
                     y=audio.astype(np.float32),
@@ -34,64 +30,46 @@ class EmotionRecognizer:
             else:
                 audio = audio.astype(np.float32)
-            # Normalize audio
             audio = librosa.util.normalize(audio)
-            # Trim/pad audio to max duration
             max_samples = self.max_duration * self.target_sr
             if len(audio) > max_samples:
                 audio = audio[:max_samples]
             else:
                 audio = np.pad(audio, (0, max(0, max_samples - len(audio))))
-            # Run classification
             results = self.classifier(
                 {"array": audio, "sampling_rate": self.target_sr}
             )
-            # Format output
             labels = [res["label"] for res in results]
             scores = [res["score"] * 100 for res in results]
-            text_output = "\n".join([
-                f"{label}: {score:.2f}%"
-                for label, score in zip(labels, scores)
-            ])
-            plot_data = {
-                "labels": labels,
-                "values": scores
-            }
             return text_output, plot_data
         except Exception as e:
-            error_msg = f"Error processing audio: {str(e)}"
-            print(error_msg)
-            return error_msg, None
 def create_interface():
     recognizer = EmotionRecognizer()
     with gr.Blocks(title="Audio Emotion Recognition") as interface:
         gr.Markdown("# 🎙️ Audio Emotion Recognition")
-        gr.Markdown("Record or upload audio (English speech, 3-10 seconds)")
         with gr.Row():
             with gr.Column():
                 audio_input = gr.Audio(
                     sources=["microphone", "upload"],
                     type="filepath",
-                    label="Input Audio",
-                    waveform_options={"waveform_progress_color": "#FF0066"}
                 )
                 submit_btn = gr.Button("Analyze", variant="primary")
             with gr.Column():
-                text_output = gr.Textbox(
-                    label="Emotion Analysis Results",
-                    interactive=False
-                )
                 plot_output = gr.BarPlot(
                     label="Confidence Scores",
                     x="labels",
@@ -105,17 +83,6 @@ def create_interface():
             inputs=audio_input,
             outputs=[text_output, plot_output]
         )
-        gr.Examples(
-            examples=[
-                "https://huggingface.co/spaces/echalabres/emotion-recognition/raw/main/example_angry.wav",
-                "https://huggingface.co/spaces/echalabres/emotion-recognition/raw/main/example_happy.wav"
-            ],
-            inputs=audio_input,
-            outputs=[text_output, plot_output],
-            fn=recognizer.process_audio,
-            cache_examples=True
-        )
     return interface

             model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
             device=0 if torch.cuda.is_available() else -1
         )
+        self.target_sr = 16000
+        self.max_duration = 10
     def process_audio(self, audio_path):
         try:
             audio, orig_sr = sf.read(audio_path)
             if len(audio.shape) > 1:
                 audio = np.mean(audio, axis=1)
             if orig_sr != self.target_sr:
                 audio = librosa.resample(
                     y=audio.astype(np.float32),
             else:
                 audio = audio.astype(np.float32)
             audio = librosa.util.normalize(audio)
             max_samples = self.max_duration * self.target_sr
             if len(audio) > max_samples:
                 audio = audio[:max_samples]
             else:
                 audio = np.pad(audio, (0, max(0, max_samples - len(audio))))
             results = self.classifier(
                 {"array": audio, "sampling_rate": self.target_sr}
             )
             labels = [res["label"] for res in results]
             scores = [res["score"] * 100 for res in results]
+            text_output = "\n".join([f"{label}: {score:.2f}%" for label, score in zip(labels, scores)])
+            plot_data = {"labels": labels, "values": scores}
             return text_output, plot_data
         except Exception as e:
+            return f"Error processing audio: {str(e)}", None
 def create_interface():
     recognizer = EmotionRecognizer()
     with gr.Blocks(title="Audio Emotion Recognition") as interface:
         gr.Markdown("# 🎙️ Audio Emotion Recognition")
+        gr.Markdown("Record or upload English speech (3-10 seconds)")
         with gr.Row():
             with gr.Column():
                 audio_input = gr.Audio(
                     sources=["microphone", "upload"],
                     type="filepath",
+                    label="Input Audio"
                 )
                 submit_btn = gr.Button("Analyze", variant="primary")
             with gr.Column():
+                text_output = gr.Textbox(label="Results", interactive=False)
                 plot_output = gr.BarPlot(
                     label="Confidence Scores",
                     x="labels",
             inputs=audio_input,
             outputs=[text_output, plot_output]
         )
     return interface