Boltz79 commited on
Commit
fd8d90c
·
verified ·
1 Parent(s): 590dc40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -42
app.py CHANGED
@@ -10,10 +10,6 @@ import io
10
  import matplotlib.pyplot as plt
11
  import librosa.display
12
  from PIL import Image # For image conversion
13
- import sqlite3
14
- import uuid
15
- import shutil
16
- from datetime import datetime
17
 
18
  # Try to import noisereduce (if not available, noise reduction will be skipped)
19
  try:
@@ -39,37 +35,6 @@ def add_emoji_to_label(label):
39
  emoji = emotion_to_emoji.get(label.lower(), "")
40
  return f"{label.capitalize()} {emoji}"
41
 
42
- # Set up SQLite database connection and create table if it doesn't exist.
43
- conn = sqlite3.connect("predictions.db", check_same_thread=False)
44
- cursor = conn.cursor()
45
- cursor.execute('''
46
- CREATE TABLE IF NOT EXISTS predictions (
47
- id INTEGER PRIMARY KEY AUTOINCREMENT,
48
- file_path TEXT NOT NULL,
49
- predicted_emotion TEXT NOT NULL,
50
- timestamp TEXT NOT NULL
51
- )
52
- ''')
53
- conn.commit()
54
-
55
- def store_prediction(file_path, predicted_emotion):
56
- """Store the audio file path, predicted emotion, and current timestamp in the database."""
57
- timestamp = datetime.now().isoformat()
58
- cursor.execute("INSERT INTO predictions (file_path, predicted_emotion, timestamp) VALUES (?, ?, ?)",
59
- (file_path, predicted_emotion, timestamp))
60
- conn.commit()
61
-
62
- def save_uploaded_audio(audio_file_path):
63
- """Copy the uploaded audio file to a permanent 'uploads' directory with a unique filename."""
64
- uploads_dir = "uploads"
65
- if not os.path.exists(uploads_dir):
66
- os.makedirs(uploads_dir)
67
- file_extension = os.path.splitext(audio_file_path)[1]
68
- new_filename = f"{uuid.uuid4()}{file_extension}"
69
- destination = os.path.join(uploads_dir, new_filename)
70
- shutil.copy(audio_file_path, destination)
71
- return destination
72
-
73
  # Load the pre-trained SpeechBrain classifier
74
  classifier = foreign_class(
75
  source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
@@ -145,7 +110,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
145
  result = classifier.classify_file(temp_file)
146
  os.remove(temp_file)
147
  if isinstance(result, tuple) and len(result) > 3:
148
- label = result[3][0] # Extract predicted emotion label
149
  else:
150
  label = str(result)
151
  return add_emoji_to_label(label.lower())
@@ -169,14 +134,10 @@ def plot_waveform(audio_file):
169
  def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
170
  """
171
  Run emotion prediction and generate a waveform plot.
172
- Then, save the uploaded audio file and store its metadata in the database.
173
  Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
174
  """
175
  emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
176
  waveform = plot_waveform(audio_file)
177
- # Save the uploaded audio file permanently and store the prediction in the database.
178
- stored_file_path = save_uploaded_audio(audio_file)
179
- store_prediction(stored_file_path, emotion)
180
  return emotion, waveform
181
 
182
  with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
@@ -184,7 +145,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
184
  gr.Markdown(
185
  "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
186
  "The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
187
- "Your audio file and predicted emotion will be stored locally."
188
  )
189
 
190
  with gr.Tabs():
@@ -216,7 +177,6 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
216
  - Optional Noise Reduction.
217
  - Visualization of the audio waveform.
218
  - Emoji representation of the predicted emotion in the output.
219
- - Local storage of audio files and metadata (predicted emotion, timestamp).
220
 
221
  **Credits:**
222
  - [SpeechBrain](https://speechbrain.github.io)
 
10
  import matplotlib.pyplot as plt
11
  import librosa.display
12
  from PIL import Image # For image conversion
 
 
 
 
13
 
14
  # Try to import noisereduce (if not available, noise reduction will be skipped)
15
  try:
 
35
  emoji = emotion_to_emoji.get(label.lower(), "")
36
  return f"{label.capitalize()} {emoji}"
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Load the pre-trained SpeechBrain classifier
39
  classifier = foreign_class(
40
  source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
 
110
  result = classifier.classify_file(temp_file)
111
  os.remove(temp_file)
112
  if isinstance(result, tuple) and len(result) > 3:
113
+ label = result[3][0] # Extract predicted emotion label from the tuple
114
  else:
115
  label = str(result)
116
  return add_emoji_to_label(label.lower())
 
134
  def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
135
  """
136
  Run emotion prediction and generate a waveform plot.
 
137
  Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
138
  """
139
  emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
140
  waveform = plot_waveform(audio_file)
 
 
 
141
  return emotion, waveform
142
 
143
  with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
 
145
  gr.Markdown(
146
  "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
147
  "The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
148
+ "Use the options below to adjust ensemble prediction and noise reduction settings."
149
  )
150
 
151
  with gr.Tabs():
 
177
  - Optional Noise Reduction.
178
  - Visualization of the audio waveform.
179
  - Emoji representation of the predicted emotion in the output.
 
180
 
181
  **Credits:**
182
  - [SpeechBrain](https://speechbrain.github.io)