import gradio as gr
import whisper
import librosa
import numpy as np

# Load Whisper model (using tiny for faster performance)
model = whisper.load_model("tiny")

# Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
def chunk_audio(audio_file, chunk_size=5):
    # Load audio file
    audio, sr = librosa.load(audio_file, sr=16000)
    
    # Determine the number of chunks (in seconds)
    total_duration = len(audio) / sr
    num_chunks = int(total_duration // chunk_size)
    
    # Split the audio into chunks
    audio_chunks = []
    for i in range(num_chunks):
        start = int(i * chunk_size * sr)
        end = int((i + 1) * chunk_size * sr)
        audio_chunks.append(audio[start:end])
    
    # If the last chunk is shorter than chunk_size, append it as well
    if len(audio) % (chunk_size * sr) != 0:
        audio_chunks.append(audio[num_chunks * chunk_size * sr:])
    
    return audio_chunks, sr

# Function to transcribe the audio in chunks using Whisper
def transcribe_audio_in_chunks(audio_file):
    if audio_file is None:
        return "No audio file provided."
    
    # Chunk the audio into 5-second parts
    chunks, sr = chunk_audio(audio_file, chunk_size=5)

    # Process each chunk and append the results as real-time transcription
    transcription = ""
    for i, chunk in enumerate(chunks):
        # Convert the chunk into the correct format for Whisper (numpy array of floats)
        chunk = np.array(chunk)
        
        # Transcribe each chunk
        result = model.transcribe(chunk)
        transcription += f"Chunk {i + 1}: {result['text']}\n"
    
    return transcription

# Gradio interface for real-time transcription with chunking
iface = gr.Interface(
    fn=transcribe_audio_in_chunks,  # Function to process the audio file in chunks
    inputs=gr.Audio(type="filepath"),  # Audio upload, passing file path
    outputs="text",  # Output transcriptions in real-time
    title="Whisper Audio Transcription with Chunking",
    description="Upload an audio file, and Whisper will transcribe it in real-time as chunks."
)

# Launch the Gradio interface with a shareable link (use share=True for Colab)
iface.launch()