import os from transformers import pipeline import gradio as gr # Fetch the token from the environment hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN") model_id = "akadriu/whisper-medium-sq" # update with your model id #model_id ="./" pipe = pipeline("automatic-speech-recognition", model=model_id, token=hf_token) def transcribe_speech(filepath): # Check if the filepath is valid if filepath is None: raise ValueError("No audio file provided.") # Perform speech transcription output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "albanian", }, chunk_length_s=30, batch_size=8, ) return output["text"] # Create Gradio interfaces without the 'source' argument mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(type="filepath"), outputs="text", ) file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(type="filepath"), outputs="text", ) demo = gr.Blocks() with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(debug=True)