import gradio as gr import torch from faster_whisper import WhisperModel # Determine compute device and model size device = "cuda" if torch.cuda.is_available() else "cpu" compute_type = "float16" if torch.cuda.is_available() else "float32" # Initialize the faster-whisper model model = WhisperModel("tiny", device=device, compute_type=compute_type ) def transcribe(audio): # Transcribe audio using faster-whisper segments, _ = model.transcribe(audio, language="yo") # Combine all segments into one text result = " ".join([segment.text for segment in segments]) return result iface = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), outputs="text", live=True, title="Speech-to-Text Demo", description="Transcribe speech to text using the Whisper model." ) if __name__ == "__main__": iface.launch(share=True)