from transformers import pipeline
import gradio as gr
import os

model_id = "HarshitJoshi/whisper-small-Hindi"
pipe = pipeline("automatic-speech-recognition", model=model_id)

def transcribe_speech(filepath):
    output = pipe(
        filepath,
        max_new_tokens=256,
        generate_kwargs={
            "task": "transcribe",
            "language": "hindi",
        },
        chunk_length_s=10,
        batch_size=4,
    )
    return output["text"]

example_folder = "./examples"
example_files = [f for f in os.listdir(example_folder) if f.endswith('.wav') or f.endswith('.mp3')]

def handle_input(mic, upload, example):
    if mic is not None:
        return transcribe_speech(mic)
    elif upload is not None:
        return transcribe_speech(upload.name)
    elif example is not None:
        return transcribe_speech(os.path.join(example_folder, example))
    else:
        return "Please provide an input."

with gr.Blocks() as demo:
    with gr.Row():
        mic = gr.Audio(type="filepath", label="Record from Microphone")
        upload = gr.Audio(type="file", label="Upload an Audio File")
        example = gr.Dropdown(choices=example_files, label="Or Select an Example")

    output = gr.Textbox(label="Transcription")
    
    submit_btn = gr.Button("Transcribe")
    submit_btn.click(handle_input, inputs=[mic, upload, example], outputs=output)

demo.launch(debug=True)