raffaelsiregar commited on
Commit
e2950b2
·
verified ·
1 Parent(s): 0cab308

solved several bugs

Browse files
Files changed (1) hide show
  1. app.py +14 -27
app.py CHANGED
@@ -69,7 +69,7 @@ class CNN1DLSTMAudioClassifier(nn.Module):
69
  num_class = 6
70
  model = CNN1DLSTMAudioClassifier(num_class)
71
 
72
- model.load_state_dict(torch.load("speech-emotion-recognition-best-model.bin", weights_only=False))
73
  model.eval()
74
 
75
  def preprocess_single_audio(file_path, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
@@ -135,6 +135,8 @@ def decode_emotion_prediction(prediction_tensor, label_encoder):
135
 
136
  return predicted_emotion, confidence
137
 
 
 
138
 
139
  def predict(wave):
140
  wave = preprocess_single_audio(wave)
@@ -146,29 +148,14 @@ def predict(wave):
146
  predicted_emotion, confidence = decode_emotion_prediction(prediction, le)
147
  return f"Predicted emotion: {predicted_emotion} (Confidence: {confidence:.2f})"
148
 
149
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
150
- gr.Markdown("# Audio Prediction App")
151
- gr.Markdown("Upload an audio file or record directly to get a prediction")
152
-
153
- with gr.Row():
154
- audio_input = gr.Audio(source="microphone", type="filepath")
155
- audio_output = gr.Audio(label="Processed Audio")
156
-
157
- with gr.Row():
158
- submit_btn = gr.Button("Get Prediction", variant="primary")
159
- clear_btn = gr.Button("Clear")
160
-
161
- prediction_output = gr.Textbox(label="Prediction")
162
-
163
- submit_btn.click(
164
- fn=predict,
165
- inputs=[audio_input, audio_input.source],
166
- outputs=[audio_output, prediction_output]
167
- )
168
-
169
- clear_btn.click(
170
- fn=lambda: (None, None, ""),
171
- outputs=[audio_input, audio_output, prediction_output]
172
- )
173
-
174
- demo.launch()
 
69
  num_class = 6
70
  model = CNN1DLSTMAudioClassifier(num_class)
71
 
72
+ model.load_state_dict(torch.load("speech-emotion-recognition/speech-emotion-recognition-best-model.bin", weights_only=False))
73
  model.eval()
74
 
75
  def preprocess_single_audio(file_path, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
 
135
 
136
  return predicted_emotion, confidence
137
 
138
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
139
+ model = model.to(device)
140
 
141
  def predict(wave):
142
  wave = preprocess_single_audio(wave)
 
148
  predicted_emotion, confidence = decode_emotion_prediction(prediction, le)
149
  return f"Predicted emotion: {predicted_emotion} (Confidence: {confidence:.2f})"
150
 
151
+ # Gradio Interface
152
+ iface = gr.Interface(
153
+ fn=predict,
154
+ inputs=gr.Audio(sources="microphone", type="filepath"),
155
+ outputs="text",
156
+ live=True,
157
+ title="Speech Emotion Recognition",
158
+ description="Record your voice and get the predicted emotion."
159
+ )
160
+
161
+ iface.launch()