Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,37 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
|
|
|
|
3 |
|
4 |
-
# Load
|
|
|
|
|
|
|
|
|
5 |
emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
|
6 |
|
7 |
-
# Define a function to process
|
8 |
-
def transcribe_and_analyze(
|
9 |
-
# Load
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# Create Gradio interface
|
17 |
interface = gr.Interface(
|
18 |
fn=transcribe_and_analyze,
|
19 |
-
inputs=gr.Audio(type="filepath"), # Accept audio input
|
20 |
outputs=[
|
21 |
-
gr.Textbox(label="Transcription"), #
|
22 |
-
gr.JSON(label="Emotion Analysis") #
|
23 |
],
|
24 |
title="Audio to Emotion Analysis"
|
25 |
)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
3 |
+
import torch
|
4 |
+
from datasets import load_dataset
|
5 |
|
6 |
+
# Load Whisper model and processor
|
7 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
8 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
9 |
+
|
10 |
+
# Load the Hugging Face emotion classifier
|
11 |
emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
|
12 |
|
13 |
+
# Define a function to process audio and analyze emotions
|
14 |
+
def transcribe_and_analyze(audio_path):
|
15 |
+
# Load audio
|
16 |
+
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
17 |
+
audio = dataset[0]["audio"]["array"]
|
18 |
+
|
19 |
+
# Process audio with Whisper
|
20 |
+
input_features = processor(audio, return_tensors="pt").input_features
|
21 |
+
predicted_ids = model.generate(input_features)
|
22 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
23 |
+
|
24 |
+
# Analyze emotions in the transcription
|
25 |
+
emotions = emotion_classifier(transcription)
|
26 |
+
return transcription, emotions
|
27 |
|
28 |
# Create Gradio interface
|
29 |
interface = gr.Interface(
|
30 |
fn=transcribe_and_analyze,
|
31 |
+
inputs=gr.Audio(type="filepath"), # Accept audio input
|
32 |
outputs=[
|
33 |
+
gr.Textbox(label="Transcription"), # Display transcription
|
34 |
+
gr.JSON(label="Emotion Analysis") # Display emotion analysis
|
35 |
],
|
36 |
title="Audio to Emotion Analysis"
|
37 |
)
|