Spaces:

raffaelsiregar
/

speech-emotion-recognition

Sleeping

App Files Files Community

raffaelsiregar commited on Oct 10, 2024

Commit

e2950b2

verified ·

1 Parent(s): 0cab308

solved several bugs

Browse files

Files changed (1) hide show

app.py +14 -27

app.py CHANGED Viewed

@@ -69,7 +69,7 @@ class CNN1DLSTMAudioClassifier(nn.Module):
 num_class = 6
 model = CNN1DLSTMAudioClassifier(num_class)
-model.load_state_dict(torch.load("speech-emotion-recognition-best-model.bin", weights_only=False))
 model.eval()
 def preprocess_single_audio(file_path, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
@@ -135,6 +135,8 @@ def decode_emotion_prediction(prediction_tensor, label_encoder):
     return predicted_emotion, confidence
 def predict(wave):
     wave = preprocess_single_audio(wave)
@@ -146,29 +148,14 @@ def predict(wave):
     predicted_emotion, confidence = decode_emotion_prediction(prediction, le)
     return f"Predicted emotion: {predicted_emotion} (Confidence: {confidence:.2f})"
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Audio Prediction App")
-    gr.Markdown("Upload an audio file or record directly to get a prediction")
-    with gr.Row():
-        audio_input = gr.Audio(source="microphone", type="filepath")
-        audio_output = gr.Audio(label="Processed Audio")
-    with gr.Row():
-        submit_btn = gr.Button("Get Prediction", variant="primary")
-        clear_btn = gr.Button("Clear")
-    prediction_output = gr.Textbox(label="Prediction")
-    submit_btn.click(
-        fn=predict,
-        inputs=[audio_input, audio_input.source],
-        outputs=[audio_output, prediction_output]
-    )
-    clear_btn.click(
-        fn=lambda: (None, None, ""),
-        outputs=[audio_input, audio_output, prediction_output]
-    )
-demo.launch()

 num_class = 6
 model = CNN1DLSTMAudioClassifier(num_class)
+model.load_state_dict(torch.load("speech-emotion-recognition/speech-emotion-recognition-best-model.bin", weights_only=False))
 model.eval()
 def preprocess_single_audio(file_path, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
     return predicted_emotion, confidence
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
 def predict(wave):
     wave = preprocess_single_audio(wave)
     predicted_emotion, confidence = decode_emotion_prediction(prediction, le)
     return f"Predicted emotion: {predicted_emotion} (Confidence: {confidence:.2f})"
+# Gradio Interface
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Audio(sources="microphone", type="filepath"),
+    outputs="text",
+    live=True,
+    title="Speech Emotion Recognition",
+    description="Record your voice and get the predicted emotion."
+)
+iface.launch()