import gradio as gr
from transformers import pipeline
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"

def transcribe(audio):
    
    pipe = pipeline(
        "automatic-speech-recognition",
        model="openai/whisper-small",
        chunk_length_s=30,
        device=device,
    )

    # ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
    # sample = ds[0]["audio"]
    
    # prediction = pipe(sample.copy(), batch_size=8)["text"]
    prediction = pipe(audio)["text"]
    print(prediction)
    
    return prediction
    
gradio_app = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Textbox(label="Result"),
    title="Transcribed",
)

if __name__ == "__main__":
    gradio_app.launch(share=True)