import torch
import os
import gradio as gr
from deep_translator import GoogleTranslator
import whisper

# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Directories for transcripts
BASE_DIR = os.getcwd()
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')

# Ensure transcripts directory exists
def check_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

check_directory(TRANSCRIPTS_FOLDER)

def live_transcribe_and_translate(stream, selected_language, model_type="base"):
    """
    Transcribe live audio using Whisper and translate it into English if required.
    
    :param stream: Stream of live audio data
    :param selected_language: Language code for transcription
    :param model_type: Whisper model type (default is 'base')
    :return: Transcription and translation
    """
    try:
        # Load the Whisper model based on user selection
        model = whisper.load_model(model_type, device=DEVICE)
    except Exception as e:
        return f"Failed to load Whisper model ({model_type}): {e}"
    
    # Prepare audio processor
    audio_processor = whisper.audio.AudioProcessor(model, streaming=True)

    translated_text = []
    transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt')
    with open(transcript_file, 'w', encoding='utf-8') as text_file:
        for chunk in stream:
            result = audio_processor.transcribe(chunk, return_timestamps=True)
            for segment in result['segments']:
                start_time = segment['start']
                end_time = segment['end']
                text = segment['text']
                text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
                if selected_language in ['nl']:
                    text_en = GoogleTranslator(source='auto', target='en').translate(text)
                    translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
                    text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
                    
    return "\n".join(translated_text) if translated_text else "Live transcription completed."

# Define the Gradio interface
interface = gr.Interface(
    fn=live_transcribe_and_translate,
    inputs=[
        gr.Audio(type="numpy", label="Upload Audio"),  # Adjusted for pre-recorded or in-memory audio
        gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"),
        gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
    ],
    outputs="text",
    title="Live Transcription and Translation"
)


if __name__ == '__main__':
    # Launch the Gradio interface
    interface.launch()