Spaces:
Sleeping
Sleeping
File size: 1,862 Bytes
2327692 a094510 0759a7f daebcf8 de9f399 2327692 a094510 80d6d93 2327692 a094510 de9f399 daebcf8 f5d0beb daebcf8 6203c8d a094510 3c4b71e 1b66efd 6203c8d 1b66efd 666588f a094510 f5d0beb a094510 2327692 a094510 2327692 a094510 2327692 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
import torch
import torchaudio
import soundfile as sf
# Load Whisper model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-large")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
# Load the Hugging Face emotion classifier
emotion_classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
# Define a function to process audio and analyze emotions
def transcribe_and_analyze(audio_path):
# Load audio from the provided file
audio, sample_rate = sf.read(audio_path)
# Resample audio to 16000 Hz if necessary
print('resample')
if sample_rate != 16000:
audio_tensor = torchaudio.functional.resample(torch.tensor(audio), orig_freq=sample_rate, new_freq=16000)
audio = audio_tensor.numpy() # Convert back to numpy array
# Process audio with Whisper
# input_features = model(audio)
input_features = processor(audio, sampling_rate=16000, return_tensors="pt")
print(input_features)
print('trans')
predicted_ids = model.generate(input_features.input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
print(transcription)
# Analyze emotions in the transcription
emotions = emotion_classifier(transcription)
return transcription, emotions
# Create Gradio interface
interface = gr.Interface(
fn=transcribe_and_analyze,
inputs=gr.Audio(type="filepath"), # Accept audio input
outputs=[
gr.Textbox(label="Transcription"), # Display transcription
gr.JSON(label="Emotion Analysis") # Display emotion analysis
],
title="Audio to Emotion Analysis"
)
# Launch the Gradio app
interface.launch()
|