Spaces:

rawanahmed
/

TTS

Runtime error

rawanahmed commited on Dec 17, 2024

Commit

f97e8dc

verified ·

1 Parent(s): ca2dbad

Upload 2 files

Files changed (2) hide show

app.py ADDED Viewed

+import gradio as gr
+from datasets import load_dataset
+import torch
+from transformers import SpeechT5ForSpeechToText, SpeechT5Processor
+# Load the English subset of the VoxPopuli dataset
+dataset = load_dataset("VoxPopuli", "en")
+# Example function to load audio and transcriptions
+def get_sample(dataset):
+    # Get a random sample from the training set
+    sample = dataset['train'][0]  # You can modify to pick a random sample or any sample index
+    audio_file = sample["audio"]["path"]
+    transcription = sample["sentence"]
+    return audio_file, transcription
+# Initialize the SpeechT5 model and processor
+processor = SpeechT5Processor.from_pretrained("facebook/speech_t5_base")
+model = SpeechT5ForSpeechToText.from_pretrained("facebook/speech_t5_base")
+# Example Gradio interface function
+def transcribe(audio):
+    # Process the audio and get transcription
+    inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    transcription = processor.decode(logits[0], skip_special_tokens=True)
+    return transcription
+# Load a sample to check if everything is set up
+audio_file, transcription = get_sample(dataset)
+# Set up Gradio interface
+iface = gr.Interface(fn=transcribe, inputs=gr.Audio(source="upload", type="filepath"), outputs="text")
+# Launch the interface
+iface.launch()

requirements.txt ADDED Viewed

+transformers==4.30.0
+datasets==2.15.0
+torch==2.1.0
+librosa==0.10.0
+soundfile==0.10.3.post1