|
from transformers import AutoTokenizer, TextStreamer |
|
from unsloth import FastLanguageModel |
|
import torch |
|
|
|
|
|
model_name = "Rafay17/Llama3.2_1b_customModle2" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
model = FastLanguageModel.from_pretrained( |
|
model_name=model_name, |
|
max_seq_length=512, |
|
dtype="float16", |
|
load_in_4bit=True |
|
) |
|
|
|
FastLanguageModel.for_inference(model) |
|
|
|
|
|
def generate_response(input_text): |
|
|
|
labeled_prompt = f"User Input: {input_text}\nResponse:" |
|
|
|
|
|
inputs = tokenizer( |
|
[labeled_prompt], |
|
return_tensors="pt", |
|
padding=True, |
|
truncation=True, |
|
max_length=512, |
|
).to("cuda") |
|
|
|
|
|
text_streamer = TextStreamer(tokenizer, skip_prompt=True) |
|
|
|
|
|
with torch.no_grad(): |
|
model.generate( |
|
input_ids=inputs.input_ids, |
|
attention_mask=inputs.attention_mask, |
|
streamer=text_streamer, |
|
max_new_tokens=100, |
|
pad_token_id=tokenizer.eos_token_id, |
|
) |
|
|
|
|
|
def user_interaction(): |
|
print("Welcome to the Chatbot! Type 'exit' to quit.") |
|
while True: |
|
user_input = input("You: ") |
|
if user_input.lower() == 'exit': |
|
print("Exiting the chatbot. Goodbye!") |
|
break |
|
print("Chatbot is generating a response...") |
|
generate_response(user_input) |
|
|
|
|
|
user_interaction() |
|
|