Spaces:

Mark0047
/

roberta-whispers

Sleeping

Mark0047 commited on Dec 13, 2024

Commit

a094510

verified ·

1 Parent(s): 80d6d93

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,25 +1,37 @@
 import gradio as gr
-from transformers import pipeline
-# Load the Hugging Face model
 emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
-# Define a function to process the transcribed text with the emotion model
-def transcribe_and_analyze(audio):
-    # Load Whisper for transcription
-    whisper = gr.load("models/openai/whisper-large-v3-turbo")
-    transcription = whisper(audio)  # Transcribe audio
-    # Analyze emotions in the transcribed text
-    emotions = emotion_classifier(transcription["text"])
-    return transcription["text"], emotions
 # Create Gradio interface
 interface = gr.Interface(
     fn=transcribe_and_analyze,
-    inputs=gr.Audio(type="filepath"),  # Accept audio input (fixed)
     outputs=[
-        gr.Textbox(label="Transcription"),  # Show the transcription
-        gr.JSON(label="Emotion Analysis")  # Show the emotion analysis
     ],
     title="Audio to Emotion Analysis"
 )

 import gradio as gr
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
+import torch
+from datasets import load_dataset
+# Load Whisper model and processor
+processor = WhisperProcessor.from_pretrained("openai/whisper-large")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
+# Load the Hugging Face emotion classifier
 emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
+# Define a function to process audio and analyze emotions
+def transcribe_and_analyze(audio_path):
+    # Load audio
+    dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+    audio = dataset[0]["audio"]["array"]
+    # Process audio with Whisper
+    input_features = processor(audio, return_tensors="pt").input_features
+    predicted_ids = model.generate(input_features)
+    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    # Analyze emotions in the transcription
+    emotions = emotion_classifier(transcription)
+    return transcription, emotions
 # Create Gradio interface
 interface = gr.Interface(
     fn=transcribe_and_analyze,
+    inputs=gr.Audio(type="filepath"),  # Accept audio input
     outputs=[
+        gr.Textbox(label="Transcription"),  # Display transcription
+        gr.JSON(label="Emotion Analysis")  # Display emotion analysis
     ],
     title="Audio to Emotion Analysis"
 )