import requests
import gradio as gr
import tempfile
import os
from transformers import pipeline
from huggingface_hub import InferenceClient
import time
import psutil
# import torch
# import numpy as np

# Ensure CUDA is available and set device accordingly
# device = 0 if torch.cuda.is_available() else -1

model_id = "openai/whisper-small"
client = InferenceClient(model_id,token=os.getenv('HF_TOKEN'))
pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)

print(f'The Server is Running !!!')

def transcribe(inputs, use_api):
    start = time.time()
    API_STATUS = ''
    
    memory_before = psutil.Process(os.getpid()).memory_info().rss

    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

    try:
        if use_api:
            print(f'Using API for transcription...')
            API_STATUS = 'Using API it took: '
            # Use InferenceClient (API) if checkbox is checked
            res = client.automatic_speech_recognition(inputs).text
        else:
            print(f'Using local pipeline for transcription...')
            # Use local pipeline if checkbox is unchecked
            API_STATUS = 'Using local pipeline it took: '
            res = pipe(inputs, chunk_length_s=30)["text"]
        
        end = time.time() - start

        # Measure memory after running the transcription process
        memory_after = psutil.Process(os.getpid()).memory_info().rss
        
        # Calculate the difference to see how much memory was used by the code
        memory_used = memory_after - memory_before  # Memory used in bytes
        memory_used_gb = round(memory_used / (1024 ** 3), 2)  # Convert memory used to GB
        total_memory_gb = round(psutil.virtual_memory().total / (1024 ** 3), 2)  # Total RAM in GB

        # Calculate the percentage of RAM used by this process
        memory_used_percent = round((memory_used / psutil.virtual_memory().total) * 100, 2)
        
        return res, API_STATUS + str(round(end, 2)) + ' seconds', f"RAM Used by code: {memory_used_gb} GB ({memory_used_percent}%) Total RAM: {total_memory_gb} GB"
    
    
    except Exception as e:
        return fr'Error: {str(e)}', None, None

demo = gr.Blocks()

mf_transcribe = gr.Interface(
                fn=transcribe,
                inputs=[
                    gr.Audio(sources="microphone", type="filepath"),
                    # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
                    gr.Checkbox(label="Use API", value=False)
                ],
                outputs=[gr.Textbox(label="Transcribed Text", type="text"),
                         gr.Textbox(label="Time taken", type="text"),
                         gr.Textbox(label="RAM Utilization", type="text") 
                        ],  # Placeholder for transcribed text and time taken
                title="Welcome to QuickTranscribe",
                description=(
                    "Transcribe long-form microphone or audio inputs with the click of a button!"
                ),
                allow_flagging="never",
            )

file_transcribe = gr.Interface(
                fn=transcribe,
                inputs=[
                    gr.Audio(sources="upload", type="filepath", label="Audio file"),
                    # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
                    gr.Checkbox(label="Use API", value=False)  # Checkbox for API usage
                ],
                outputs=[ gr.Textbox(label="Transcribed Text", type="text"),
                         gr.Textbox(label="Time taken", type="text"),
                         gr.Textbox(label="RAM Utilization", type="text") 
                        ],  # Placeholder for transcribed text and time taken
                title="Welcome to QuickTranscribe",
                description=(
                    "Transcribe long-form microphone or audio inputs with the click of a button!"
                ),
                allow_flagging="never",
            )

with demo:
    with gr.Row():
    # with gr.Column():
        # Group the tabs for microphone and file-based transcriptions
        tab = gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])

        # with gr.Column():
        #     use_api_checkbox = gr.Checkbox(label="Use API", value=False)  # Checkbox outside
        #     # time_taken = gr.Textbox(label="Time taken", type="text")  # Time taken outside the interfaces

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=5000)