import torch
import spaces

import gradio as gr
import pandas as pd
from threading import Thread
import re
import time
import tempfile
import os

from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read

from sentence_transformers import SentenceTransformer

from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader 
from langchain_text_splitters import SentenceTransformersTokenTextSplitter

from PIL import Image

HF_TOKEN = os.environ["Inference_Calls"]
print(HF_TOKEN)

# from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
# processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
# model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
"""
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    token=HF_TOKEN
    ).to("cuda:0")
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
"""
from huggingface_hub import InferenceClient
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
client = InferenceClient(model=model_id, token="HF_TOKEN")
print("Client object created!")
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

ASR_MODEL_NAME = "openai/whisper-large-v3"
ASR_BATCH_SIZE = 8
ASR_CHUNK_LENGTH_S = 30
TEMP_FILE_LIMIT_MB = 1024 #2048

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

device = 0 if torch.cuda.is_available() else "cpu"

asr_pl = pipeline(
    task="automatic-speech-recognition",
    model=ASR_MODEL_NAME,
    chunk_length_s=ASR_CHUNK_LENGTH_S,
    device=device,
)

application_title = "Enlight Innovations Limited -- Demo"
application_description = "This demo is designed to illustrate our basic ideas and feasibility in implementation."

# Chatbot Interface functions
@spaces.GPU
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    ):
    
    # messages = [{"role": "system", "content": system_message}]

    # for val in history:
    #     if val[0]:
    #         messages.append({"role": "user", "content": val[0]})
    #     if val[1]:
    #         messages.append({"role": "assistant", "content": val[1]})

    # messages.append({"role": "user", "content": message})

    messages =[
	{ "role": "user", "content": "What is Python Programming?" },
    ]
    print(messages)
    response = ""
        
    for message in client.chat.completions.create( #client.chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

# Transcribe Interface functions
@spaces.GPU
def transcribe(asr_inputs, task):
    #print("Type: " + str(type(asr_inputs)))
    if asr_inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") 
        
    text = asr_pl(asr_inputs, batch_size=ASR_BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return text.strip()

# Profile Interface functions
def load_profiles():
    try:
        return pd.read_csv("temp_profiles.csv", index_col=False)
    except FileNotFoundError:
        return pd.DataFrame()

def save_profile(profile_data):
    df = load_profiles()
    df = df.append(profile_data, ignore_index=True)
    df.to_csv("temp_profiles.csv", index=False)

def lookup_profile(assessment_id_input):
    df = load_profiles()
    print(df)
    print(assessment_id_input); print(type(assessment_id_input))
    assessment_id = assessment_id_input.strip()
    if not assessment_id:
        #state.update("Please enter an Assessment ID", color="red")
        return "No Assessment Object/Session ID provided!"

    result = df[df.Assessment_ID == assessment_id]
    if not result.empty:
        if isinstance(data, pd.DataFrame):
            return result.to_html()
        elif isinstance(data, pd.Series):
            return result.to_frame().to_html()
    else:
        #state.update("No profiles found for this ID", color="red")
        return "No matched profile found!"
    #profile_preview.update(value=results.to_markdown(index=False))
    #state.update("Profile(s) found!", color="green")

"""Gradio User Interface"""
#audio_input = gr.Audio(sources="upload", type="filepath", label="Audio: from file") #gr.Audio(sources="microphone", type="filepath", label="Audio: from microphone")
#audio_input_choice = gr.Radio(["audio file", "microphone"], label="Audio Input Source", value="audio file") #

# Profile Interface components
with gr.Blocks() as profile_interface:
    # Profile Lookup Section
    with gr.Column():
        assessment_id_input = gr.Textbox(
            label="Assessment Object/Session ID",
            placeholder="Enter Assessment Object/Session ID here...",
            #required=True
        )
        with gr.Row():
            lookup_btn = gr.Button("Lookup Profile", variant="primary")
            clear_btn = gr.Button("Clear Results", variant="secondary")
        #state = gr.State(elem_classes="status-container")
        profile_preview = gr.Markdown(label="Profile Results", value="")
    
   
# Profile Interface

# Transcribe Interface components
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input Source")
task_input_choice = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
task_output = gr.Textbox(label="Transcribed Output")

# Transcribe Interface
transcribe_interface = gr.Interface(
    fn=transcribe,
    inputs=[
        audio_input,
        #audio_input_choice,
        task_input_choice,
    ],
    outputs=[
        task_output, #"text",
    ],
    title=application_title,
    description=application_description,
    allow_flagging="never",
)

# ChatInterface components
chatbot_main = gr.Chatbot(label="Extraction Output")
chatbot_main_input = gr.MultimodalTextbox({"text": "Choose the referred material(s) and ask your question.", "files":[]})
chatbot_sys_output = gr.Textbox(value="You are a friendly Chatbot.", label="System Message")
chatbot_max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max. New Tokens")
chatbot_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature")
chatbot_top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        )

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""

chat_interface = gr.ChatInterface(
    respond,
    multimodal=True,
    title=application_title,
    description=application_description,
    chatbot=chatbot_main,
    textbox=chatbot_main_input,
    additional_inputs=[
        chatbot_sys_output,
        chatbot_max_tokens,
        chatbot_temperature,
        chatbot_top_p,
    ],
)

with gr.Blocks() as demo:
    gr.TabbedInterface([profile_interface, transcribe_interface, chat_interface], ["Step 0: Profile", "Step 1: Transcribe", "Step 2: Extract"])

    """
    def clear_audio_input():
        return None
    """

    # Load existing profiles to Step 0: Profile tab page on startup
    # Event Bindings
    lookup_btn.click(lookup_profile, assessment_id_input, profile_preview)
    clear_btn.click(lambda: profile_preview.update("")) #, state.update("", color=""))
    #assessment_id_input.change(lambda: state.update("", color=""), None, None)
 
    
    def update_task_input(task_input_choice):
        if task_input_choice == "transcribe":
            return gr.Textbox(label="Transcribed Output") #Audio(sources="upload", label="Audio: from file") 
        elif task_input_choice == "translate":
            return gr.Textbox(label="Translated Output") #Audio(sources="microphone", label="Audio: from microphone")
            
    #task_input_choice.input(fn=clear_audio_input, outputs=audio_input).then(fn=update_audio_input, 
    task_input_choice.input(fn=update_task_input, 
                            inputs=task_input_choice, 
                            outputs=task_output
                            )

    def update_chatbot_main_input(updated_text):
        return {"text": updated_text, "files":[]}
    
    task_output.change(fn=update_chatbot_main_input, 
                       inputs=task_output,
                       outputs=chatbot_main_input
                      )


if __name__ == "__main__":
    demo.queue().launch() #demo.launch()