from transformers import pipeline import gradio as gr import os model_id = "HarshitJoshi/whisper-small-Hindi" pipe = pipeline("automatic-speech-recognition", model=model_id) def transcribe_speech(source, source_type): if source_type == "Microphone": filepath = source elif source_type == "Upload": filepath = source.name elif source_type == "Example": filepath = os.path.join(example_folder, source) output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "hindi", }, chunk_length_s=10, batch_size=4, ) return output["text"] example_folder = "./examples" example_files = [f for f in os.listdir(example_folder) if f.endswith('.wav') or f.endswith('.mp3')] def handle_input(mic, upload, example): if mic is not None: return transcribe_speech(mic, "Microphone") elif upload is not None: return transcribe_speech(upload, "Upload") elif example is not None: return transcribe_speech(example, "Example") else: return "Please provide an input." with gr.Blocks() as demo: mic = gr.Audio(source="microphone", type="filepath", label="Record from Microphone") upload = gr.Audio(source="upload", type="file", label="Upload an Audio File") example = gr.Dropdown(choices=example_files, label="Or Select an Example") output = gr.Textbox(label="Transcription") submit_btn = gr.Button("Transcribe") submit_btn.click(handle_input, inputs=[mic, upload, example], outputs=output) demo.launch(debug=True)