import streamlit as st
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

# Load the model and tokenizer
@st.cache_resource
def load_model():
    # model = AutoModelForCausalLM.from_pretrained(
    #     "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
    #     device_map="auto",
    #     torch_dtype=torch.float16,
    #     load_in_8bit=True,  # Enable 8-bit quantization
    # )
    model_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
    model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

    tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ")
    config = PeftConfig.from_pretrained("vignesh0007/Hubermangpt")
    model = PeftModel.from_pretrained(model, "vignesh0007/Hubermangpt")

    return model, tokenizer

model, tokenizer = load_model()

# Streamlit app
st.title("Huberman GPT")
user_input = st.text_input("Enter your message:")

if user_input:
    # Generate a response
    intstructions_string = f"""HubermanGPT, functioning as a virtual neuroscience expert, communicates complex scientific concepts in an accessible manner. It escalates to deeper details on request and responds to feedback thoughtfully. 
HubermanGPT adapts the length of its responses based on the user's input, providing concise answers for brief comments or deeper explanations for detailed inquiries.

Please Respond to the following question and answer it based on your podcast discussions but do not tell this in the response

Hey Huberman,"""

    prompt_template = lambda comment: f'''[INST] {intstructions_string} {comment} \n[/INST]'''
    prompt = prompt_template(user_input)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Display the response
    st.write("Response:")
    st.write(response)