from transformers import pipeline import gradio as gr import os model_id = "HarshitJoshi/whisper-small-Hindi" pipe = pipeline("automatic-speech-recognition", model=model_id) def transcribe_speech(audio): output = pipe( audio, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "hindi", }, chunk_length_s=10, batch_size=4, ) return output["text"] example_folder = "./examples" example_files = [f for f in os.listdir(example_folder) if f.endswith('.wav') or f.endswith('.mp3')] def play_and_transcribe(filename): filepath = os.path.join(example_folder, filename) transcription = transcribe_speech(filepath) return filepath, transcription with gr.Blocks() as demo: gr.Markdown("# Hindi Speech Transcription") with gr.Tab("Transcribe"): audio_input = gr.Audio(source="microphone", type="filepath", label="Audio Input") upload_button = gr.UploadButton("Upload Audio", file_types=["audio"]) transcribe_button = gr.Button("Transcribe") output_text = gr.Textbox(label="Transcription") transcribe_button.click( fn=transcribe_speech, inputs=audio_input, outputs=output_text ) upload_button.upload( fn=lambda file: file.name, inputs=upload_button, outputs=audio_input ) with gr.Tab("Examples"): example_dropdown = gr.Dropdown(choices=example_files, label="Select an example") example_audio = gr.Audio(label="Audio Playback") example_transcription = gr.Textbox(label="Transcription") example_dropdown.change( fn=play_and_transcribe, inputs=example_dropdown, outputs=[example_audio, example_transcription] ) demo.launch(debug=True)