File size: 6,477 Bytes
8f82242
 
c241f02
 
8f82242
 
 
 
c241f02
8f82242
 
53122c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr

# Load Llama 3.2 model
model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Helper function to process long contexts
MAX_TOKENS = 100000  # Replace with the max token limit of the Llama model


#########
###
#########
import faiss
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# Load Llama model
#model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
#tokenizer = AutoTokenizer.from_pretrained(model_name)
#model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Load Sentence Transformer model for embeddings
embedder = SentenceTransformer('distiluse-base-multilingual-cased')  # Suitable for German text

########
###
###
#####
# Load the CSV data
url = 'https://www.bofrost.de/datafeed/DE/products.csv'
data = pd.read_csv(url, sep='|')

# List of columns to keep
columns_to_keep = [
    'ID', 'Name', 'Description', 'Price', 
    'ProductCategory', 'Grammage', 
    'BasePriceText', 'Rating', 'RatingCount',
    'Ingredients', 'CreationDate', 'Keywords', 'Brand'
]

# Filter the DataFrame
data_cleaned = data[columns_to_keep]

# Remove unwanted characters from the 'Description' column
data_cleaned['Description'] = data_cleaned['Description'].str.replace(r'[^\w\s.,;:\'"/?!€$%&()\[\]{}<>|=+\\-]', ' ', regex=True)

# Combine relevant text columns for embedding
data_cleaned['combined_text'] = data_cleaned.apply(lambda row: ' '.join([str(row[col]) for col in ['Name', 'Description', 'Keywords'] if pd.notnull(row[col])]), axis=1)

######
##
#####

# Generate embeddings for the combined text
embeddings = embedder.encode(data_cleaned['combined_text'].tolist(), convert_to_tensor=True)

# Convert embeddings to numpy array
embeddings = embeddings.cpu().detach().numpy()

# Initialize FAISS index
d = embeddings.shape[1]  # Dimension of embeddings
faiss_index = faiss.IndexFlatL2(d)

# Add embeddings to the index
faiss_index.add(embeddings)

#######
##
######
def search_products(query, top_k=7):
    # Generate embedding for the query
    query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()

    # Search FAISS index
    distances, indices = faiss_index.search(query_embedding, top_k)

    # Retrieve corresponding products
    results = data_cleaned.iloc[indices[0]].to_dict(orient='records')
    return results



# Update the prompt construction to include ChromaDB results
def construct_system_prompt( context):
    prompt = f"You are a friendly bot specializing in Bofrost products. Return comprehensive german answers. Always add product ids. Use the following product descriptions:\n\n{context}\n\n"
    return prompt

# Helper function to construct the prompt
def construct_prompt(user_input, context, chat_history, max_history_turns=1): # Added max_history_turns
    system_message = construct_system_prompt(context)
    prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
    
    # Limit history to the last max_history_turns
    for i, (user_msg, assistant_msg) in enumerate(chat_history[-max_history_turns:]): 
        prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
        prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
    
    prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    print("-------------------------")
    print(prompt)
    return prompt

def chat_with_model(user_input, chat_history=[]):
    # Search for relevant products
    search_results = search_products(user_input)

    # Create context with search results
    if search_results:
        context = "Product Context:\n"
        for product in search_results:
            context += f"Produkt ID: {product['ID']}\n"
            context += f"Name: {product['Name']}\n"
            context += f"Beschreibung: {product['Description']}\n"
            context += f"Preis: {product['Price']}€\n"
            context += f"Bewertung: {product['Rating']} ({product['RatingCount']} Bewertungen)\n"
            context += f"Kategorie: {product['ProductCategory']}\n"
            context += f"Marke: {product['Brand']}\n"
            context += "---\n"
    else:
        context = "Das weiß ich nicht."
    print("context: ------------------------------------- \n"+context)
    # Pass both user_input and context to construct_prompt
    prompt = construct_prompt(user_input, context, chat_history) # This line is changed
    print("prompt: ------------------------------------- \n"+prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cuda")
    tokenizer.pad_token = tokenizer.eos_token
    attention_mask = torch.ones_like(input_ids).to("cuda")
    outputs = model.generate(input_ids, attention_mask=attention_mask,
                             max_new_tokens=1200, do_sample=True, 
                             top_k=50, temperature=0.7)
    response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
    print("respone: ------------------------------------- \n"+response)
    chat_history.append((context, response))  # or chat_history.append((user_input, response)) if you want to store user input
    return response, chat_history

#####
###
###
# Gradio Interface
def gradio_interface(user_input, history):
    response, updated_history = chat_with_model(user_input, history)
    return response, updated_history

with gr.Blocks() as demo:
    gr.Markdown("# 🦙 Llama Instruct Chat with ChromaDB Integration")
    with gr.Row():
        user_input = gr.Textbox(label="Your Message", lines=2, placeholder="Type your message here...")
        submit_btn = gr.Button("Send")
    chat_history = gr.State([])
    chat_display = gr.Textbox(label="Chat Response", lines=10, placeholder="Chat history will appear here...", interactive=False)
    submit_btn.click(gradio_interface, inputs=[user_input, chat_history], outputs=[chat_display, chat_history])

demo.launch(debug=True)