Depreesion

Runtime error

App Files Files Community

Depreesion / tabs /speech_emotion_recognition.py

vitorcalvi

pre-launch

fc286f6 12 months ago

raw

history blame

2.36 kB

	import gradio as gr
	import numpy as np
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	from transformers import pipeline

	emotion_model = pipeline("audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition")
	transcription_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")

	emotion_mapping = {
	"angry": (0.8, 0.8, -0.5), "happy": (0.6, 0.6, 0.8), "sad": (-0.6, -0.4, -0.6),
	"neutral": (0, 0, 0), "fear": (0.3, -0.3, -0.7), "surprise": (0.4, 0.2, 0.2),
	"disgust": (0.2, 0.5, -0.6), "calm": (-0.2, 0.1, 0.3), "excited": (0.7, 0.5, 0.7),
	"frustrated": (0.6, 0.5, -0.4)
	}

	def process_audio(audio_file):
	y, sr = librosa.load(audio_file, sr=None)
	transcription = transcription_model(audio_file)["text"]
	emotion_result = emotion_model(audio_file)[0]
	emotion, confidence = emotion_result["label"], emotion_result["score"]
	arousal, dominance, valence = emotion_mapping.get(emotion.lower(), (0, 0, 0))

	plt.figure(figsize=(10, 4))
	librosa.display.waveshow(y, sr=sr)
	plt.title("Waveform")
	waveform_plot = plt.gcf()
	plt.close()

	mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
	plt.figure(figsize=(10, 4))
	librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max), sr=sr, x_axis='time', y_axis='mel')
	plt.colorbar(format='%+2.0f dB')
	plt.title("Mel Spectrogram")
	mel_spec_plot = plt.gcf()
	plt.close()

	return transcription, emotion, confidence, arousal, dominance, valence, waveform_plot, mel_spec_plot

	def create_emotion_recognition_tab():
	with gr.Row():
	with gr.Column(scale=2):
	audio_input = gr.Audio(type="filepath")
	gr.Examples(["./assets/audio/fitness.wav"], inputs=[audio_input])
	transcription_output = gr.Textbox(label="Transcription")
	emotion_output = gr.Textbox(label="Emotion")
	with gr.Column(scale=1):
	outputs = [gr.Number(label=label) for label in ["Confidence", "Arousal", "Dominance", "Valence"]]
	with gr.Column(scale=1):
	plots = [gr.Plot(label=label) for label in ["Waveform", "Mel Spectrogram"]]

	audio_input.change(process_audio, inputs=[audio_input],
	outputs=[transcription_output, emotion_output] + outputs + plots)