Spaces:
Runtime error
Runtime error
File size: 1,947 Bytes
fc286f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from transformers import pipeline
import librosa
import numpy as np
import matplotlib.pyplot as plt
# Load Whisper model using transformers pipeline
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
def analyze_audio(audio):
# Convert audio to text using Whisper
transcription_result = transcriber(audio)
transcription = transcription_result["text"]
# Load audio file
y, sr = librosa.load(audio, sr=None)
# Extract prosodic features
pitch = librosa.yin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
# Calculate pitch variance
pitch_variance = np.var(pitch)
# Estimate speaking pace (syllables per second)
# This is a simplified estimation based on the number of words
num_syllables = len(transcription.split())
duration = librosa.get_duration(y=y, sr=sr)
pace = num_syllables / duration
# Plot pitch
plt.figure(figsize=(10, 4))
plt.plot(pitch, label='Pitch')
plt.xlabel('Time')
plt.ylabel('Frequency (Hz)')
plt.title('Pitch Over Time')
plt.legend()
pitch_plot_path = '/tmp/pitch_contour.png'
plt.savefig(pitch_plot_path)
plt.close()
# Voice Stress Analysis (simplified example)
stress_level = np.std(pitch) # Standard deviation as a simple stress indicator
return transcription, tempo, pace, pitch_variance, pitch_plot_path
# Create Gradio interface
input_audio = gr.Audio(label="Input Audio", type="filepath")
iface = gr.Interface(
fn=analyze_audio,
inputs=input_audio,
outputs=[
gr.Textbox(label="Transcription"),
gr.Number(label="Tempo (BPM)"),
gr.Number(label="Speaking Pace (syllables/sec)"),
gr.Number(label="Pitch Variance"),
gr.Image(label="Pitch Contour Plot")
],
live=True
)
iface.launch(share=False)
|