import gradio as gr
from llama_cpp import Llama
import whisper
from gtts import gTTS
import tempfile
import os
from huggingface_hub import hf_hub_download


# ----- Initialization -----
model_path = hf_hub_download(repo_id=TheBloke/Llama-2-7B-GGUF, filename=llama-2-7b.Q2_K.gguf)

# Initialize the LLAMA model. Update the model_path to point to your model file.
llm = Llama(
    model_path=model_path,
    n_threads=2, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
    n_ctx=4096, # Context window
)


# Load the Whisper model for speech-to-text transcription.
whisper_model = whisper.load_model("base")

# ----- Helper Functions -----

def transcribe_audio(audio_file):
    """
    Transcribes the provided audio file using Whisper.
    """
    if audio_file is None:
        return ""
    result = whisper_model.transcribe(audio_file)
    return result["text"]

def generate_response(prompt, max_tokens=150, temperature=0.7):
    """
    Uses LLAMA-CPP to generate a response for the given prompt.
    """
    # Call the LLAMA model. The output is a dict with a "choices" list.
    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
    response = output["choices"][0]["text"]
    return response.strip()

def text_to_speech(text):
    """
    Converts text to speech using gTTS and returns the filepath to the saved audio.
    """
    tts = gTTS(text=text, lang="en")
    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(tmp_file.name)
    return tmp_file.name

def voice_chat(audio, text, history, max_tokens, temperature):
    """
    Handles a single turn of the conversation:
      - If an audio file is provided and no text message, transcribe it.
      - Builds a conversation prompt from the chat history.
      - Generates a response from LLAMA.
      - Converts the response to speech.
    Returns updated chat history, the response text, the response audio filepath, and updated state.
    """
    # Use the transcribed audio if text is empty.
    if audio is not None and (text is None or text.strip() == ""):
        user_input = transcribe_audio(audio)
    else:
        user_input = text if text else ""
    
    # Build the conversation prompt (history is a list of tuples: (user, assistant))
    prompt = ""
    if history:
        for (user_turn, bot_turn) in history:
            prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
    prompt += f"User: {user_input}\nAssistant: "

    # Generate response using LLAMA-CPP.
    response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
    # Convert the response to speech audio.
    audio_response = text_to_speech(response_text)
    
    # Append this turn to the conversation history.
    new_history = history.copy() if history else []
    new_history.append((user_input, response_text))
    
    # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
    return new_history, response_text, audio_response, new_history

# ----- Gradio Interface -----

with gr.Blocks() as demo:
    gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
    
    with gr.Row():
        with gr.Column(scale=5):
            # User inputs: Audio input and/or text input.
            audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
            text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
            send_btn = gr.Button("Send")
            max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
            temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
        with gr.Column(scale=7):
            # Display outputs: Chat history, assistant text response, and audio playback.
            chat_history = gr.Chatbot(label="Chat History")
            response_textbox = gr.Textbox(label="Assistant Response")
            audio_output = gr.Audio(label="Response Audio", type="filepath")
    
    # Gradio State to hold the conversation history.
    state = gr.State([])

    def run_voice_chat(audio, text, history, max_tokens, temperature):
        return voice_chat(audio, text, history, max_tokens, temperature)
    
    # On clicking the "Send" button, run the voice_chat function.
    send_btn.click(
        fn=run_voice_chat,
        inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
        outputs=[chat_history, response_textbox, audio_output, state]
    )

# Launch the app.
demo.launch()