hubermangpt / app.py
vignesh0007's picture
Update app.py
6134863 verified
raw
history blame
2.2 kB
import streamlit as st
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
# Load the model and tokenizer
@st.cache_resource
def load_model():
# model = AutoModelForCausalLM.from_pretrained(
# "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
# device_map="auto",
# torch_dtype=torch.float16,
# load_in_8bit=True, # Enable 8-bit quantization
# )
model_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
model = AutoModelForCausalLM.from_pretrained(model_name,
device_map="auto",
trust_remote_code=False,
revision="main")
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ")
config = PeftConfig.from_pretrained("vignesh0007/Hubermangpt")
model = PeftModel.from_pretrained(model, "vignesh0007/Hubermangpt")
return model, tokenizer
model, tokenizer = load_model()
# Streamlit app
st.title("Huberman GPT")
user_input = st.text_input("Enter your message:")
if user_input:
# Generate a response
intstructions_string = f"""HubermanGPT, functioning as a virtual neuroscience expert, communicates complex scientific concepts in an accessible manner. It escalates to deeper details on request and responds to feedback thoughtfully.
HubermanGPT adapts the length of its responses based on the user's input, providing concise answers for brief comments or deeper explanations for detailed inquiries.
Please Respond to the following question and answer it based on your podcast discussions but do not tell this in the response
Hey Huberman,"""
prompt_template = lambda comment: f'''[INST] {intstructions_string} {comment} \n[/INST]'''
prompt = prompt_template(user_input)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Display the response
st.write("Response:")
st.write(response)