File size: 1,926 Bytes
a3af99d
 
6982029
93396ac
 
cea2d18
6982029
93396ac
a3af99d
 
 
 
 
 
93396ac
a3af99d
 
 
6982029
a3af99d
cea2d18
a3af99d
6982029
a3af99d
 
 
 
 
 
 
93396ac
6982029
 
93396ac
6982029
a3af99d
6982029
 
 
 
a3af99d
6982029
 
93396ac
a3af99d
 
cea2d18
a3af99d
cea2d18
a3af99d
cea2d18
a3af99d
cea2d18
a3af99d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from transformers import AutoTokenizer, TextStreamer
from unsloth import FastLanguageModel
import torch

# Load the model and tokenizer
model_name = "Rafay17/Llama3.2_1b_customModle2"  # Your model name
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,  # Adjust as needed
    dtype="float16",     # Adjust as needed
    load_in_4bit=True    # Adjust based on your needs
)

FastLanguageModel.for_inference(model)  # Call this immediately after loading the model

# Function to generate a response
def generate_response(input_text):
    # Prepare the labeled prompt for the model
    labeled_prompt = f"User Input: {input_text}\nResponse:"

    # Prepare the input for the model
    inputs = tokenizer(
        [labeled_prompt],
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512,  # Ensure this matches your model's max length
    ).to("cuda")

    # Set up the text streamer to stream the generated response
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)

    # Generate the response
    with torch.no_grad():  # Disable gradient calculation for inference
        model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            streamer=text_streamer,
            max_new_tokens=100,  # Adjust this value as needed
            pad_token_id=tokenizer.eos_token_id,
        )

# Function to take user input and generate output
def user_interaction():
    print("Welcome to the Chatbot! Type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting the chatbot. Goodbye!")
            break
        print("Chatbot is generating a response...")
        generate_response(user_input)

# Start the user interaction
user_interaction()