chatbot / app.py
Rafay17's picture
Update app.py
cea2d18 verified
raw
history blame
1.93 kB
from transformers import AutoTokenizer, TextStreamer
from unsloth import FastLanguageModel
import torch
# Load the model and tokenizer
model_name = "Rafay17/Llama3.2_1b_customModle2" # Your model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FastLanguageModel.from_pretrained(
model_name=model_name,
max_seq_length=512, # Adjust as needed
dtype="float16", # Adjust as needed
load_in_4bit=True # Adjust based on your needs
)
FastLanguageModel.for_inference(model) # Call this immediately after loading the model
# Function to generate a response
def generate_response(input_text):
# Prepare the labeled prompt for the model
labeled_prompt = f"User Input: {input_text}\nResponse:"
# Prepare the input for the model
inputs = tokenizer(
[labeled_prompt],
return_tensors="pt",
padding=True,
truncation=True,
max_length=512, # Ensure this matches your model's max length
).to("cuda")
# Set up the text streamer to stream the generated response
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
# Generate the response
with torch.no_grad(): # Disable gradient calculation for inference
model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
streamer=text_streamer,
max_new_tokens=100, # Adjust this value as needed
pad_token_id=tokenizer.eos_token_id,
)
# Function to take user input and generate output
def user_interaction():
print("Welcome to the Chatbot! Type 'exit' to quit.")
while True:
user_input = input("You: ")
if user_input.lower() == 'exit':
print("Exiting the chatbot. Goodbye!")
break
print("Chatbot is generating a response...")
generate_response(user_input)
# Start the user interaction
user_interaction()