import streamlit as st from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch # Load the model and tokenizer @st.cache_resource def load_model(): # model = AutoModelForCausalLM.from_pretrained( # "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ", # device_map="auto", # torch_dtype=torch.float16, # load_in_8bit=True, # Enable 8-bit quantization # ) model_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ" model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=False, revision="main") tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ") config = PeftConfig.from_pretrained("vignesh0007/Hubermangpt") model = PeftModel.from_pretrained(model, "vignesh0007/Hubermangpt") return model, tokenizer model, tokenizer = load_model() # Streamlit app st.title("Huberman GPT") user_input = st.text_input("Enter your message:") if user_input: # Generate a response intstructions_string = f"""HubermanGPT, functioning as a virtual neuroscience expert, communicates complex scientific concepts in an accessible manner. It escalates to deeper details on request and responds to feedback thoughtfully. HubermanGPT adapts the length of its responses based on the user's input, providing concise answers for brief comments or deeper explanations for detailed inquiries. Please Respond to the following question and answer it based on your podcast discussions but do not tell this in the response Hey Huberman,""" prompt_template = lambda comment: f'''[INST] {intstructions_string} {comment} \n[/INST]''' prompt = prompt_template(user_input) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=50) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Display the response st.write("Response:") st.write(response)