Spaces:
Running
Running
from transformers import pipeline | |
asr = pipeline(task="automatic-speech-recognition", | |
model="distil-whisper/distil-small.en") | |
import os | |
import gradio as gr | |
from pydub import AudioSegment | |
demo = gr.Blocks() | |
def transcribe_speech(filepath): | |
if filepath is None: | |
gr.Warning("No audio found, please retry.") | |
return "" | |
# Load audio using pydub | |
audio = AudioSegment.from_file(filepath) | |
chunk_length_ms = 30 * 1000 # 30 seconds | |
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)] | |
full_transcription = "" | |
for i, chunk in enumerate(chunks): | |
# Export chunk to temporary wav file | |
chunk_path = f"chunk_{i}.wav" | |
chunk.export(chunk_path, format="wav") | |
# Transcribe the chunk | |
result = asr(chunk_path) | |
full_transcription += result["text"] + " " | |
# Clean up (optional) | |
os.remove(chunk_path) | |
return full_transcription.strip() | |
mic_transcribe = gr.Interface( | |
fn=transcribe_speech, | |
inputs=gr.Audio(sources="microphone", | |
type="filepath"), | |
outputs=gr.Textbox(label="Transcription", | |
lines=3), | |
allow_flagging="never") | |
file_transcribe = gr.Interface( | |
fn=transcribe_speech, | |
inputs=gr.Audio(sources="upload", | |
type="filepath"), | |
outputs=gr.Textbox(label="Transcription", | |
lines=3), | |
allow_flagging="never",) | |
with demo: | |
gr.TabbedInterface( | |
[mic_transcribe, | |
file_transcribe], | |
["Transcribe Microphone", | |
"Transcribe Audio File"], | |
) | |
server_port = int(os.environ.get("PORT", 7860)) | |
demo.launch(share=True, | |
server_port=server_port) |