Boltz79 commited on
Commit
16851ca
·
verified ·
1 Parent(s): cd578af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -2
app.py CHANGED
@@ -10,6 +10,10 @@ import io
10
  import matplotlib.pyplot as plt
11
  import librosa.display
12
  from PIL import Image # For image conversion
 
 
 
 
13
 
14
  # Try to import noisereduce (if not available, noise reduction will be skipped)
15
  try:
@@ -35,6 +39,37 @@ def add_emoji_to_label(label):
35
  emoji = emotion_to_emoji.get(label.lower(), "")
36
  return f"{label.capitalize()} {emoji}"
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Load the pre-trained SpeechBrain classifier
39
  classifier = foreign_class(
40
  source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
@@ -110,7 +145,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
110
  result = classifier.classify_file(temp_file)
111
  os.remove(temp_file)
112
  if isinstance(result, tuple) and len(result) > 3:
113
- label = result[3][0] # Extract predicted emotion label from the tuple
114
  else:
115
  label = str(result)
116
  return add_emoji_to_label(label.lower())
@@ -134,10 +169,14 @@ def plot_waveform(audio_file):
134
  def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
135
  """
136
  Run emotion prediction and generate a waveform plot.
 
137
  Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
138
  """
139
  emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
140
  waveform = plot_waveform(audio_file)
 
 
 
141
  return emotion, waveform
142
 
143
  with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
@@ -145,7 +184,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
145
  gr.Markdown(
146
  "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
147
  "The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
148
- "Use the options below to adjust ensemble prediction and noise reduction settings."
149
  )
150
 
151
  with gr.Tabs():
@@ -177,6 +216,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
177
  - Optional Noise Reduction.
178
  - Visualization of the audio waveform.
179
  - Emoji representation of the predicted emotion in the output.
 
180
 
181
  **Credits:**
182
  - [SpeechBrain](https://speechbrain.github.io)
 
10
  import matplotlib.pyplot as plt
11
  import librosa.display
12
  from PIL import Image # For image conversion
13
+ import sqlite3
14
+ import uuid
15
+ import shutil
16
+ from datetime import datetime
17
 
18
  # Try to import noisereduce (if not available, noise reduction will be skipped)
19
  try:
 
39
  emoji = emotion_to_emoji.get(label.lower(), "")
40
  return f"{label.capitalize()} {emoji}"
41
 
42
+ # Set up SQLite database connection and create table if it doesn't exist.
43
+ conn = sqlite3.connect("predictions.db", check_same_thread=False)
44
+ cursor = conn.cursor()
45
+ cursor.execute('''
46
+ CREATE TABLE IF NOT EXISTS predictions (
47
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
48
+ file_path TEXT NOT NULL,
49
+ predicted_emotion TEXT NOT NULL,
50
+ timestamp TEXT NOT NULL
51
+ )
52
+ ''')
53
+ conn.commit()
54
+
55
+ def store_prediction(file_path, predicted_emotion):
56
+ """Store the audio file path, predicted emotion, and current timestamp in the database."""
57
+ timestamp = datetime.now().isoformat()
58
+ cursor.execute("INSERT INTO predictions (file_path, predicted_emotion, timestamp) VALUES (?, ?, ?)",
59
+ (file_path, predicted_emotion, timestamp))
60
+ conn.commit()
61
+
62
+ def save_uploaded_audio(audio_file_path):
63
+ """Copy the uploaded audio file to a permanent 'uploads' directory with a unique filename."""
64
+ uploads_dir = "uploads"
65
+ if not os.path.exists(uploads_dir):
66
+ os.makedirs(uploads_dir)
67
+ file_extension = os.path.splitext(audio_file_path)[1]
68
+ new_filename = f"{uuid.uuid4()}{file_extension}"
69
+ destination = os.path.join(uploads_dir, new_filename)
70
+ shutil.copy(audio_file_path, destination)
71
+ return destination
72
+
73
  # Load the pre-trained SpeechBrain classifier
74
  classifier = foreign_class(
75
  source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
 
145
  result = classifier.classify_file(temp_file)
146
  os.remove(temp_file)
147
  if isinstance(result, tuple) and len(result) > 3:
148
+ label = result[3][0] # Extract predicted emotion label
149
  else:
150
  label = str(result)
151
  return add_emoji_to_label(label.lower())
 
169
  def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
170
  """
171
  Run emotion prediction and generate a waveform plot.
172
+ Then, save the uploaded audio file and store its metadata in the database.
173
  Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
174
  """
175
  emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
176
  waveform = plot_waveform(audio_file)
177
+ # Save the uploaded audio file permanently and store the prediction in the database.
178
+ stored_file_path = save_uploaded_audio(audio_file)
179
+ store_prediction(stored_file_path, emotion)
180
  return emotion, waveform
181
 
182
  with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
 
184
  gr.Markdown(
185
  "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
186
  "The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
187
+ "Your audio file and predicted emotion will be stored locally."
188
  )
189
 
190
  with gr.Tabs():
 
216
  - Optional Noise Reduction.
217
  - Visualization of the audio waveform.
218
  - Emoji representation of the predicted emotion in the output.
219
+ - Local storage of audio files and metadata (predicted emotion, timestamp).
220
 
221
  **Credits:**
222
  - [SpeechBrain](https://speechbrain.github.io)