File size: 1,926 Bytes
a3af99d 6982029 93396ac cea2d18 6982029 93396ac a3af99d 93396ac a3af99d 6982029 a3af99d cea2d18 a3af99d 6982029 a3af99d 93396ac 6982029 93396ac 6982029 a3af99d 6982029 a3af99d 6982029 93396ac a3af99d cea2d18 a3af99d cea2d18 a3af99d cea2d18 a3af99d cea2d18 a3af99d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from transformers import AutoTokenizer, TextStreamer
from unsloth import FastLanguageModel
import torch
# Load the model and tokenizer
model_name = "Rafay17/Llama3.2_1b_customModle2" # Your model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FastLanguageModel.from_pretrained(
model_name=model_name,
max_seq_length=512, # Adjust as needed
dtype="float16", # Adjust as needed
load_in_4bit=True # Adjust based on your needs
)
FastLanguageModel.for_inference(model) # Call this immediately after loading the model
# Function to generate a response
def generate_response(input_text):
# Prepare the labeled prompt for the model
labeled_prompt = f"User Input: {input_text}\nResponse:"
# Prepare the input for the model
inputs = tokenizer(
[labeled_prompt],
return_tensors="pt",
padding=True,
truncation=True,
max_length=512, # Ensure this matches your model's max length
).to("cuda")
# Set up the text streamer to stream the generated response
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
# Generate the response
with torch.no_grad(): # Disable gradient calculation for inference
model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
streamer=text_streamer,
max_new_tokens=100, # Adjust this value as needed
pad_token_id=tokenizer.eos_token_id,
)
# Function to take user input and generate output
def user_interaction():
print("Welcome to the Chatbot! Type 'exit' to quit.")
while True:
user_input = input("You: ")
if user_input.lower() == 'exit':
print("Exiting the chatbot. Goodbye!")
break
print("Chatbot is generating a response...")
generate_response(user_input)
# Start the user interaction
user_interaction()
|