Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,10 @@ import io
|
|
10 |
import matplotlib.pyplot as plt
|
11 |
import librosa.display
|
12 |
from PIL import Image # For image conversion
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Try to import noisereduce (if not available, noise reduction will be skipped)
|
15 |
try:
|
@@ -35,6 +39,37 @@ def add_emoji_to_label(label):
|
|
35 |
emoji = emotion_to_emoji.get(label.lower(), "")
|
36 |
return f"{label.capitalize()} {emoji}"
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Load the pre-trained SpeechBrain classifier
|
39 |
classifier = foreign_class(
|
40 |
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
|
@@ -110,7 +145,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
|
|
110 |
result = classifier.classify_file(temp_file)
|
111 |
os.remove(temp_file)
|
112 |
if isinstance(result, tuple) and len(result) > 3:
|
113 |
-
label = result[3][0] # Extract predicted emotion label
|
114 |
else:
|
115 |
label = str(result)
|
116 |
return add_emoji_to_label(label.lower())
|
@@ -134,10 +169,14 @@ def plot_waveform(audio_file):
|
|
134 |
def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
|
135 |
"""
|
136 |
Run emotion prediction and generate a waveform plot.
|
|
|
137 |
Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
|
138 |
"""
|
139 |
emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
|
140 |
waveform = plot_waveform(audio_file)
|
|
|
|
|
|
|
141 |
return emotion, waveform
|
142 |
|
143 |
with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
|
@@ -145,7 +184,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
|
|
145 |
gr.Markdown(
|
146 |
"Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
|
147 |
"The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
|
148 |
-
"
|
149 |
)
|
150 |
|
151 |
with gr.Tabs():
|
@@ -177,6 +216,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
|
|
177 |
- Optional Noise Reduction.
|
178 |
- Visualization of the audio waveform.
|
179 |
- Emoji representation of the predicted emotion in the output.
|
|
|
180 |
|
181 |
**Credits:**
|
182 |
- [SpeechBrain](https://speechbrain.github.io)
|
|
|
10 |
import matplotlib.pyplot as plt
|
11 |
import librosa.display
|
12 |
from PIL import Image # For image conversion
|
13 |
+
import sqlite3
|
14 |
+
import uuid
|
15 |
+
import shutil
|
16 |
+
from datetime import datetime
|
17 |
|
18 |
# Try to import noisereduce (if not available, noise reduction will be skipped)
|
19 |
try:
|
|
|
39 |
emoji = emotion_to_emoji.get(label.lower(), "")
|
40 |
return f"{label.capitalize()} {emoji}"
|
41 |
|
42 |
+
# Set up SQLite database connection and create table if it doesn't exist.
|
43 |
+
conn = sqlite3.connect("predictions.db", check_same_thread=False)
|
44 |
+
cursor = conn.cursor()
|
45 |
+
cursor.execute('''
|
46 |
+
CREATE TABLE IF NOT EXISTS predictions (
|
47 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
48 |
+
file_path TEXT NOT NULL,
|
49 |
+
predicted_emotion TEXT NOT NULL,
|
50 |
+
timestamp TEXT NOT NULL
|
51 |
+
)
|
52 |
+
''')
|
53 |
+
conn.commit()
|
54 |
+
|
55 |
+
def store_prediction(file_path, predicted_emotion):
|
56 |
+
"""Store the audio file path, predicted emotion, and current timestamp in the database."""
|
57 |
+
timestamp = datetime.now().isoformat()
|
58 |
+
cursor.execute("INSERT INTO predictions (file_path, predicted_emotion, timestamp) VALUES (?, ?, ?)",
|
59 |
+
(file_path, predicted_emotion, timestamp))
|
60 |
+
conn.commit()
|
61 |
+
|
62 |
+
def save_uploaded_audio(audio_file_path):
|
63 |
+
"""Copy the uploaded audio file to a permanent 'uploads' directory with a unique filename."""
|
64 |
+
uploads_dir = "uploads"
|
65 |
+
if not os.path.exists(uploads_dir):
|
66 |
+
os.makedirs(uploads_dir)
|
67 |
+
file_extension = os.path.splitext(audio_file_path)[1]
|
68 |
+
new_filename = f"{uuid.uuid4()}{file_extension}"
|
69 |
+
destination = os.path.join(uploads_dir, new_filename)
|
70 |
+
shutil.copy(audio_file_path, destination)
|
71 |
+
return destination
|
72 |
+
|
73 |
# Load the pre-trained SpeechBrain classifier
|
74 |
classifier = foreign_class(
|
75 |
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
|
|
|
145 |
result = classifier.classify_file(temp_file)
|
146 |
os.remove(temp_file)
|
147 |
if isinstance(result, tuple) and len(result) > 3:
|
148 |
+
label = result[3][0] # Extract predicted emotion label
|
149 |
else:
|
150 |
label = str(result)
|
151 |
return add_emoji_to_label(label.lower())
|
|
|
169 |
def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
|
170 |
"""
|
171 |
Run emotion prediction and generate a waveform plot.
|
172 |
+
Then, save the uploaded audio file and store its metadata in the database.
|
173 |
Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
|
174 |
"""
|
175 |
emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
|
176 |
waveform = plot_waveform(audio_file)
|
177 |
+
# Save the uploaded audio file permanently and store the prediction in the database.
|
178 |
+
stored_file_path = save_uploaded_audio(audio_file)
|
179 |
+
store_prediction(stored_file_path, emotion)
|
180 |
return emotion, waveform
|
181 |
|
182 |
with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
|
|
|
184 |
gr.Markdown(
|
185 |
"Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
|
186 |
"The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
|
187 |
+
"Your audio file and predicted emotion will be stored locally."
|
188 |
)
|
189 |
|
190 |
with gr.Tabs():
|
|
|
216 |
- Optional Noise Reduction.
|
217 |
- Visualization of the audio waveform.
|
218 |
- Emoji representation of the predicted emotion in the output.
|
219 |
+
- Local storage of audio files and metadata (predicted emotion, timestamp).
|
220 |
|
221 |
**Credits:**
|
222 |
- [SpeechBrain](https://speechbrain.github.io)
|