solved several bugs
Browse files
app.py
CHANGED
@@ -69,7 +69,7 @@ class CNN1DLSTMAudioClassifier(nn.Module):
|
|
69 |
num_class = 6
|
70 |
model = CNN1DLSTMAudioClassifier(num_class)
|
71 |
|
72 |
-
model.load_state_dict(torch.load("speech-emotion-recognition-best-model.bin", weights_only=False))
|
73 |
model.eval()
|
74 |
|
75 |
def preprocess_single_audio(file_path, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
|
@@ -135,6 +135,8 @@ def decode_emotion_prediction(prediction_tensor, label_encoder):
|
|
135 |
|
136 |
return predicted_emotion, confidence
|
137 |
|
|
|
|
|
138 |
|
139 |
def predict(wave):
|
140 |
wave = preprocess_single_audio(wave)
|
@@ -146,29 +148,14 @@ def predict(wave):
|
|
146 |
predicted_emotion, confidence = decode_emotion_prediction(prediction, le)
|
147 |
return f"Predicted emotion: {predicted_emotion} (Confidence: {confidence:.2f})"
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
prediction_output = gr.Textbox(label="Prediction")
|
162 |
-
|
163 |
-
submit_btn.click(
|
164 |
-
fn=predict,
|
165 |
-
inputs=[audio_input, audio_input.source],
|
166 |
-
outputs=[audio_output, prediction_output]
|
167 |
-
)
|
168 |
-
|
169 |
-
clear_btn.click(
|
170 |
-
fn=lambda: (None, None, ""),
|
171 |
-
outputs=[audio_input, audio_output, prediction_output]
|
172 |
-
)
|
173 |
-
|
174 |
-
demo.launch()
|
|
|
69 |
num_class = 6
|
70 |
model = CNN1DLSTMAudioClassifier(num_class)
|
71 |
|
72 |
+
model.load_state_dict(torch.load("speech-emotion-recognition/speech-emotion-recognition-best-model.bin", weights_only=False))
|
73 |
model.eval()
|
74 |
|
75 |
def preprocess_single_audio(file_path, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
|
|
|
135 |
|
136 |
return predicted_emotion, confidence
|
137 |
|
138 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
139 |
+
model = model.to(device)
|
140 |
|
141 |
def predict(wave):
|
142 |
wave = preprocess_single_audio(wave)
|
|
|
148 |
predicted_emotion, confidence = decode_emotion_prediction(prediction, le)
|
149 |
return f"Predicted emotion: {predicted_emotion} (Confidence: {confidence:.2f})"
|
150 |
|
151 |
+
# Gradio Interface
|
152 |
+
iface = gr.Interface(
|
153 |
+
fn=predict,
|
154 |
+
inputs=gr.Audio(sources="microphone", type="filepath"),
|
155 |
+
outputs="text",
|
156 |
+
live=True,
|
157 |
+
title="Speech Emotion Recognition",
|
158 |
+
description="Record your voice and get the predicted emotion."
|
159 |
+
)
|
160 |
+
|
161 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|