Mark0047 commited on
Commit
a094510
·
verified ·
1 Parent(s): 80d6d93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -13
app.py CHANGED
@@ -1,25 +1,37 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
3
 
4
- # Load the Hugging Face model
 
 
 
 
5
  emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
6
 
7
- # Define a function to process the transcribed text with the emotion model
8
- def transcribe_and_analyze(audio):
9
- # Load Whisper for transcription
10
- whisper = gr.load("models/openai/whisper-large-v3-turbo")
11
- transcription = whisper(audio) # Transcribe audio
12
- # Analyze emotions in the transcribed text
13
- emotions = emotion_classifier(transcription["text"])
14
- return transcription["text"], emotions
 
 
 
 
 
 
15
 
16
  # Create Gradio interface
17
  interface = gr.Interface(
18
  fn=transcribe_and_analyze,
19
- inputs=gr.Audio(type="filepath"), # Accept audio input (fixed)
20
  outputs=[
21
- gr.Textbox(label="Transcription"), # Show the transcription
22
- gr.JSON(label="Emotion Analysis") # Show the emotion analysis
23
  ],
24
  title="Audio to Emotion Analysis"
25
  )
 
1
  import gradio as gr
2
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
3
+ import torch
4
+ from datasets import load_dataset
5
 
6
+ # Load Whisper model and processor
7
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large")
8
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
9
+
10
+ # Load the Hugging Face emotion classifier
11
  emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
12
 
13
+ # Define a function to process audio and analyze emotions
14
+ def transcribe_and_analyze(audio_path):
15
+ # Load audio
16
+ dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
17
+ audio = dataset[0]["audio"]["array"]
18
+
19
+ # Process audio with Whisper
20
+ input_features = processor(audio, return_tensors="pt").input_features
21
+ predicted_ids = model.generate(input_features)
22
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
23
+
24
+ # Analyze emotions in the transcription
25
+ emotions = emotion_classifier(transcription)
26
+ return transcription, emotions
27
 
28
  # Create Gradio interface
29
  interface = gr.Interface(
30
  fn=transcribe_and_analyze,
31
+ inputs=gr.Audio(type="filepath"), # Accept audio input
32
  outputs=[
33
+ gr.Textbox(label="Transcription"), # Display transcription
34
+ gr.JSON(label="Emotion Analysis") # Display emotion analysis
35
  ],
36
  title="Audio to Emotion Analysis"
37
  )