import streamlit as st
import tempfile
import os
import torch
import whisper
from TTS.api import TTS
from groq import Groq
from pydub import AudioSegment
from streamlit_option_menu import option_menu
import numba


# Hardcoded Groq API Key
GROQ_API_KEY = "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"

# Load TTS model
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)

def get_llm_response(user_input):
    client = Groq(api_key=GROQ_API_KEY)
    prompt = ("IMPORTANT: You are an AI assistant that MUST provide responses in 25 words or less. NO EXCEPTIONS...")
    
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": user_input}
        ],
        model="llama3-8b-8192",
        temperature=0.5,
        top_p=1,
        stop=None,
        stream=False,
    )
    return chat_completion.choices[0].message.content

def transcribe_audio(audio_path, model_size="base"):
    model = whisper.load_model(model_size)
    result = model.transcribe(audio_path)
    return result["text"]

def generate_speech(text, output_file, speaker_wav, language="hi"):
    tts.tts_to_file(text=text, file_path=output_file, speaker_wav=speaker_wav, language=language)

# UI Design
st.title("🗣️ VocalAI - AI-Powered Voice Cloning & Chatbot")
selected_page = option_menu(
    menu_title=None,
    options=["Text-to-Speech", "Voice-Cloned Chatbot"],
    icons=["mic", "chat-dots"],
    menu_icon="cast",
    default_index=0,
    orientation="horizontal"
)

# Sidebar - Reference Speaker Audio Upload
st.sidebar.header("Upload Reference Audio")
ref_audio = st.sidebar.file_uploader("Upload a speaker audio file (WAV format)", type=["wav", "ogg", "mp3"])

if selected_page == "Text-to-Speech":
    st.header("🔊 Text-to-Speech (TTS)")
    text = st.text_area("Enter text to synthesize:", "Hello, this is a cloned voice test.")
    
    if st.button("Generate Voice"):
        if ref_audio is None:
            st.warning("⚠️ Please upload a reference speaker audio file first!")
        else:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_speaker:
                temp_speaker.write(ref_audio.read())
                speaker_wav_path = temp_speaker.name
            
            output_path = "cloned_output.wav"
            generate_speech(text, output_path, speaker_wav_path, language="en")
            st.audio(output_path, format="audio/wav")
            
            with open(output_path, "rb") as f:
                st.download_button("Download Cloned Voice", f, file_name="cloned_voice.wav", mime="audio/wav")
            
            os.unlink(speaker_wav_path)

elif selected_page == "Voice-Cloned Chatbot":
    st.header("💬 AI Chatbot with Voice Cloning")
    user_query = st.text_area("Enter your query:", "Hello, explain AI briefly.")
    uploaded_voice = st.file_uploader("Or upload an audio query (WAV format)", type=["wav", "ogg", "mp3"])
    
    if uploaded_voice is not None:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
            temp_audio.write(uploaded_voice.read())
            audio_path = temp_audio.name
        user_query = transcribe_audio(audio_path)
        os.unlink(audio_path)
        st.write("**Transcribed Query:**", user_query)
    
    if st.button("Generate Response"):
        if ref_audio is None:
            st.warning("⚠️ Please upload a reference speaker audio file first!")
        else:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_speaker:
                temp_speaker.write(ref_audio.read())
                speaker_wav_path = temp_speaker.name
            
            response = get_llm_response(user_query)
            output_audio_path = "cloned_chat_response.wav"
            generate_speech(response, output_audio_path, speaker_wav_path, language="en")
            
            st.audio(output_audio_path, format="audio/wav")
            
            with open(output_audio_path, "rb") as f:
                st.download_button("Download Response Audio", f, file_name="cloned_chat_response.wav", mime="audio/wav")
            
            os.unlink(speaker_wav_path)