import gradio as gr
import os
import torch
from transformers import (
    WhisperForConditionalGeneration,
    WhisperTokenizer,
    WhisperProcessor,
    pipeline
)

torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "sudoping01/whosper-large"

# Load model and create pipeline
model = WhisperForConditionalGeneration.from_pretrained(
    model_id,
    device_map="auto",
    use_cache=True,
    attention_dropout=0.1,
    dropout=0.1,
    token=os.environ.get("HF_TOKEN")
)

model.config.suppress_tokens = []
model.config.no_repeat_ngram_size = 3
model.config.early_stopping = True
model.config.max_length = 448
model.config.num_beams = 5

tokenizer = WhisperTokenizer.from_pretrained(model_id)
processor = WhisperProcessor.from_pretrained(model_id)
feature_extractor = processor.feature_extractor

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    chunk_length_s=30,
    stride_length_s=3,
    return_timestamps=False,
    batch_size=1
)

def transcribe(audio):
    if audio is None:
        return "Please provide an audio input."
    
    try:
        result = pipe(
            audio,
            generate_kwargs={
                "temperature": 0.0,
                "do_sample": False,
                "num_beams": 5,
                "length_penalty": 1.0,
                "repetition_penalty": 1.2
            }
        )
        return result["text"]
    except Exception as e:
        return f"Error during transcription: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["microphone", "upload"], type="filepath")
    ],
    outputs=gr.Textbox(label="Transcription"),
    title="Multilingual Speech Recognition: Wolof, French, English, .. or Mix",
    description="Upload an audio file or record audio to transcribe Wolof, French, or English speech...",
    theme="default"
)

if __name__ == "__main__":
    demo.launch()