import gradio as gr
from llama_cpp import Llama
import whisper
from gtts import gTTS
import tempfile
import os
from huggingface_hub import hf_hub_download

# ----- Initialization ------
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "llama-2-13b-chat.Q5_K_M.gguf"

model_path = hf_hub_download(
    repo_id=model_name_or_path,
    filename=model_basename
)

# Initialize the LLAMA model.
llm = Llama(
    model_path=model_path,
    n_threads=2,  # CPU cores
    n_batch=512,
    n_gpu_layers=43,
    n_ctx=4096,
)

# Load the Whisper model for speech-to-text transcription.
whisper_model = whisper.load_model("base")

# ----- Helper Functions -----

def transcribe_audio(audio_file):
    """Transcribes the provided audio file using Whisper."""
    if audio_file is None:
        return ""
    result = whisper_model.transcribe(audio_file)
    return result["text"]

def generate_response(prompt, max_tokens=150, temperature=0.7):
    """
    Uses LLAMA-CPP to generate a response for the given prompt.
    """
    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
    response = output["choices"][0]["text"]
    return response.strip()

def text_to_speech(text):
    """Converts text to speech using gTTS and returns the filepath to the saved audio."""
    tts = gTTS(text=text, lang="en")
    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(tmp_file.name)
    return tmp_file.name

def voice_chat(audio, text, history, max_tokens, temperature):
    """
    Handles a single turn of the conversation:
      - If an audio file is provided and no text message, transcribe it.
      - Builds a prompt using only the current user input with additional instructions.
      - Generates a response from LLAMA.
      - Converts the assistant's response to speech.
    Returns:
      - A new history containing only the current turn.
      - The assistant's response text.
      - The assistant's response audio filepath.
      - Updated state (new history).
    """
    # Use the transcribed audio if text is empty.
    if audio is not None and (text is None or text.strip() == ""):
        user_input = transcribe_audio(audio)
    else:
        user_input = text if text else ""
    
    # Additional system instructions for improved behavior.
    system_prompt = ("You are a helpful, knowledgeable, and concise assistant. "
                     "Provide accurate, factual, and polite responses. "
                     "Answer the user's question directly without unnecessary commentary.")
    
    # Build prompt using the system instructions plus the current user input.
    prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "

    # Generate response using LLAMA-CPP.
    response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
    
    # Convert only the assistant's response to speech.
    audio_response = text_to_speech(response_text)
    
    # Reset conversation history to only include the current turn.
    new_history = [(user_input, response_text)]
    
    return new_history, response_text, audio_response, new_history

# ----- Gradio Interface -----

with gr.Blocks() as demo:
    gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
    
    with gr.Row():
        with gr.Column(scale=5):
            # User inputs: Audio input and/or text input.
            audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
            text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
            send_btn = gr.Button("Send")
            max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
            temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
        with gr.Column(scale=7):
            # Display outputs: Chat history, assistant text response, and audio playback.
            chat_history = gr.Chatbot(label="Chat History")
            response_textbox = gr.Textbox(label="Assistant Response")
            audio_output = gr.Audio(label="Response Audio", type="filepath")
    
    # Gradio State to hold the conversation history.
    state = gr.State([])

    def run_voice_chat(audio, text, history, max_tokens, temperature):
        return voice_chat(audio, text, history, max_tokens, temperature)
    
    # On clicking the "Send" button, run the voice_chat function.
    send_btn.click(
        fn=run_voice_chat,
        inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
        outputs=[chat_history, response_textbox, audio_output, state]
    )

# Launch the app.
demo.launch()