Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,10 +10,6 @@ import io
|
|
10 |
import matplotlib.pyplot as plt
|
11 |
import librosa.display
|
12 |
from PIL import Image # For image conversion
|
13 |
-
import sqlite3
|
14 |
-
import uuid
|
15 |
-
import shutil
|
16 |
-
from datetime import datetime
|
17 |
|
18 |
# Try to import noisereduce (if not available, noise reduction will be skipped)
|
19 |
try:
|
@@ -39,37 +35,6 @@ def add_emoji_to_label(label):
|
|
39 |
emoji = emotion_to_emoji.get(label.lower(), "")
|
40 |
return f"{label.capitalize()} {emoji}"
|
41 |
|
42 |
-
# Set up SQLite database connection and create table if it doesn't exist.
|
43 |
-
conn = sqlite3.connect("predictions.db", check_same_thread=False)
|
44 |
-
cursor = conn.cursor()
|
45 |
-
cursor.execute('''
|
46 |
-
CREATE TABLE IF NOT EXISTS predictions (
|
47 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
48 |
-
file_path TEXT NOT NULL,
|
49 |
-
predicted_emotion TEXT NOT NULL,
|
50 |
-
timestamp TEXT NOT NULL
|
51 |
-
)
|
52 |
-
''')
|
53 |
-
conn.commit()
|
54 |
-
|
55 |
-
def store_prediction(file_path, predicted_emotion):
|
56 |
-
"""Store the audio file path, predicted emotion, and current timestamp in the database."""
|
57 |
-
timestamp = datetime.now().isoformat()
|
58 |
-
cursor.execute("INSERT INTO predictions (file_path, predicted_emotion, timestamp) VALUES (?, ?, ?)",
|
59 |
-
(file_path, predicted_emotion, timestamp))
|
60 |
-
conn.commit()
|
61 |
-
|
62 |
-
def save_uploaded_audio(audio_file_path):
|
63 |
-
"""Copy the uploaded audio file to a permanent 'uploads' directory with a unique filename."""
|
64 |
-
uploads_dir = "uploads"
|
65 |
-
if not os.path.exists(uploads_dir):
|
66 |
-
os.makedirs(uploads_dir)
|
67 |
-
file_extension = os.path.splitext(audio_file_path)[1]
|
68 |
-
new_filename = f"{uuid.uuid4()}{file_extension}"
|
69 |
-
destination = os.path.join(uploads_dir, new_filename)
|
70 |
-
shutil.copy(audio_file_path, destination)
|
71 |
-
return destination
|
72 |
-
|
73 |
# Load the pre-trained SpeechBrain classifier
|
74 |
classifier = foreign_class(
|
75 |
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
|
@@ -145,7 +110,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
|
|
145 |
result = classifier.classify_file(temp_file)
|
146 |
os.remove(temp_file)
|
147 |
if isinstance(result, tuple) and len(result) > 3:
|
148 |
-
label = result[3][0] # Extract predicted emotion label
|
149 |
else:
|
150 |
label = str(result)
|
151 |
return add_emoji_to_label(label.lower())
|
@@ -169,14 +134,10 @@ def plot_waveform(audio_file):
|
|
169 |
def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
|
170 |
"""
|
171 |
Run emotion prediction and generate a waveform plot.
|
172 |
-
Then, save the uploaded audio file and store its metadata in the database.
|
173 |
Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
|
174 |
"""
|
175 |
emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
|
176 |
waveform = plot_waveform(audio_file)
|
177 |
-
# Save the uploaded audio file permanently and store the prediction in the database.
|
178 |
-
stored_file_path = save_uploaded_audio(audio_file)
|
179 |
-
store_prediction(stored_file_path, emotion)
|
180 |
return emotion, waveform
|
181 |
|
182 |
with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
|
@@ -184,7 +145,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
|
|
184 |
gr.Markdown(
|
185 |
"Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
|
186 |
"The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
|
187 |
-
"
|
188 |
)
|
189 |
|
190 |
with gr.Tabs():
|
@@ -216,7 +177,6 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
|
|
216 |
- Optional Noise Reduction.
|
217 |
- Visualization of the audio waveform.
|
218 |
- Emoji representation of the predicted emotion in the output.
|
219 |
-
- Local storage of audio files and metadata (predicted emotion, timestamp).
|
220 |
|
221 |
**Credits:**
|
222 |
- [SpeechBrain](https://speechbrain.github.io)
|
|
|
10 |
import matplotlib.pyplot as plt
|
11 |
import librosa.display
|
12 |
from PIL import Image # For image conversion
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Try to import noisereduce (if not available, noise reduction will be skipped)
|
15 |
try:
|
|
|
35 |
emoji = emotion_to_emoji.get(label.lower(), "")
|
36 |
return f"{label.capitalize()} {emoji}"
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Load the pre-trained SpeechBrain classifier
|
39 |
classifier = foreign_class(
|
40 |
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
|
|
|
110 |
result = classifier.classify_file(temp_file)
|
111 |
os.remove(temp_file)
|
112 |
if isinstance(result, tuple) and len(result) > 3:
|
113 |
+
label = result[3][0] # Extract predicted emotion label from the tuple
|
114 |
else:
|
115 |
label = str(result)
|
116 |
return add_emoji_to_label(label.lower())
|
|
|
134 |
def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
|
135 |
"""
|
136 |
Run emotion prediction and generate a waveform plot.
|
|
|
137 |
Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
|
138 |
"""
|
139 |
emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
|
140 |
waveform = plot_waveform(audio_file)
|
|
|
|
|
|
|
141 |
return emotion, waveform
|
142 |
|
143 |
with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
|
|
|
145 |
gr.Markdown(
|
146 |
"Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
|
147 |
"The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
|
148 |
+
"Use the options below to adjust ensemble prediction and noise reduction settings."
|
149 |
)
|
150 |
|
151 |
with gr.Tabs():
|
|
|
177 |
- Optional Noise Reduction.
|
178 |
- Visualization of the audio waveform.
|
179 |
- Emoji representation of the predicted emotion in the output.
|
|
|
180 |
|
181 |
**Credits:**
|
182 |
- [SpeechBrain](https://speechbrain.github.io)
|