Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
import librosa | |
import librosa.display | |
import matplotlib.pyplot as plt | |
from transformers import pipeline | |
emotion_model = pipeline("audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") | |
transcription_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") | |
emotion_mapping = { | |
"angry": (0.8, 0.8, -0.5), "happy": (0.6, 0.6, 0.8), "sad": (-0.6, -0.4, -0.6), | |
"neutral": (0, 0, 0), "fear": (0.3, -0.3, -0.7), "surprise": (0.4, 0.2, 0.2), | |
"disgust": (0.2, 0.5, -0.6), "calm": (-0.2, 0.1, 0.3), "excited": (0.7, 0.5, 0.7), | |
"frustrated": (0.6, 0.5, -0.4) | |
} | |
def process_audio(audio_file): | |
y, sr = librosa.load(audio_file, sr=None) | |
transcription = transcription_model(audio_file)["text"] | |
emotion_result = emotion_model(audio_file)[0] | |
emotion, confidence = emotion_result["label"], emotion_result["score"] | |
arousal, dominance, valence = emotion_mapping.get(emotion.lower(), (0, 0, 0)) | |
plt.figure(figsize=(10, 4)) | |
librosa.display.waveshow(y, sr=sr) | |
plt.title("Waveform") | |
waveform_plot = plt.gcf() | |
plt.close() | |
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr) | |
plt.figure(figsize=(10, 4)) | |
librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max), sr=sr, x_axis='time', y_axis='mel') | |
plt.colorbar(format='%+2.0f dB') | |
plt.title("Mel Spectrogram") | |
mel_spec_plot = plt.gcf() | |
plt.close() | |
return transcription, emotion, confidence, arousal, dominance, valence, waveform_plot, mel_spec_plot | |
def create_emotion_recognition_tab(): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
audio_input = gr.Audio(type="filepath") | |
gr.Examples(["./assets/audio/fitness.wav"], inputs=[audio_input]) | |
transcription_output = gr.Textbox(label="Transcription") | |
emotion_output = gr.Textbox(label="Emotion") | |
with gr.Column(scale=1): | |
outputs = [gr.Number(label=label) for label in ["Confidence", "Arousal", "Dominance", "Valence"]] | |
with gr.Column(scale=1): | |
plots = [gr.Plot(label=label) for label in ["Waveform", "Mel Spectrogram"]] | |
audio_input.change(process_audio, inputs=[audio_input], | |
outputs=[transcription_output, emotion_output] + outputs + plots) |