File size: 6,577 Bytes
8f82242 c241f02 8f82242 f7b948e c241f02 8f82242 53122c5 2b8f6ae 53122c5 f7b948e 53122c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
# Load Llama 3.2 model
model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
#model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
# Helper function to process long contexts
MAX_TOKENS = 100000 # Replace with the max token limit of the Llama model
#########
###
#########
import faiss
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Load Llama model
#model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
#tokenizer = AutoTokenizer.from_pretrained(model_name)
#model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
# Load Sentence Transformer model for embeddings
embedder = SentenceTransformer('distiluse-base-multilingual-cased') # Suitable for German text
########
###
###
#####
# Load the CSV data
url = 'https://www.bofrost.de/datafeed/DE/products.csv'
data = pd.read_csv(url, sep='|')
# List of columns to keep
columns_to_keep = [
'ID', 'Name', 'Description', 'Price',
'ProductCategory', 'Grammage',
'BasePriceText', 'Rating', 'RatingCount',
'Ingredients', 'CreationDate', 'Keywords', 'Brand'
]
# Filter the DataFrame
data_cleaned = data[columns_to_keep]
# Remove unwanted characters from the 'Description' column
data_cleaned['Description'] = data_cleaned['Description'].str.replace(r'[^\w\s.,;:\'"/?!€$%&()\[\]{}<>|=+\\-]', ' ', regex=True)
# Combine relevant text columns for embedding
data_cleaned['combined_text'] = data_cleaned.apply(lambda row: ' '.join([str(row[col]) for col in ['Name', 'Description', 'Keywords'] if pd.notnull(row[col])]), axis=1)
######
##
#####
# Generate embeddings for the combined text
embeddings = embedder.encode(data_cleaned['combined_text'].tolist(), convert_to_tensor=True)
# Convert embeddings to numpy array
embeddings = embeddings.cpu().detach().numpy()
# Initialize FAISS index
d = embeddings.shape[1] # Dimension of embeddings
faiss_index = faiss.IndexFlatL2(d)
# Add embeddings to the index
faiss_index.add(embeddings)
#######
##
######
def search_products(query, top_k=7):
# Generate embedding for the query
query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()
# Search FAISS index
distances, indices = faiss_index.search(query_embedding, top_k)
# Retrieve corresponding products
results = data_cleaned.iloc[indices[0]].to_dict(orient='records')
return results
# Update the prompt construction to include ChromaDB results
def construct_system_prompt( context):
prompt = f"You are a friendly bot specializing in Bofrost products. Return comprehensive german answers. Always add product ids. Use the following product descriptions:\n\n{context}\n\n"
return prompt
# Helper function to construct the prompt
def construct_prompt(user_input, context, chat_history, max_history_turns=1): # Added max_history_turns
system_message = construct_system_prompt(context)
prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
# Limit history to the last max_history_turns
for i, (user_msg, assistant_msg) in enumerate(chat_history[-max_history_turns:]):
prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
print("-------------------------")
print(prompt)
return prompt
def chat_with_model(user_input, chat_history=[]):
# Search for relevant products
search_results = search_products(user_input)
# Create context with search results
if search_results:
context = "Product Context:\n"
for product in search_results:
context += f"Produkt ID: {product['ID']}\n"
context += f"Name: {product['Name']}\n"
context += f"Beschreibung: {product['Description']}\n"
context += f"Preis: {product['Price']}€\n"
context += f"Bewertung: {product['Rating']} ({product['RatingCount']} Bewertungen)\n"
context += f"Kategorie: {product['ProductCategory']}\n"
context += f"Marke: {product['Brand']}\n"
context += "---\n"
else:
context = "Das weiß ich nicht."
print("context: ------------------------------------- \n"+context)
# Pass both user_input and context to construct_prompt
prompt = construct_prompt(user_input, context, chat_history) # This line is changed
print("prompt: ------------------------------------- \n"+prompt)
input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cpu")
tokenizer.pad_token = tokenizer.eos_token
attention_mask = torch.ones_like(input_ids).to("cpu")
outputs = model.generate(input_ids, attention_mask=attention_mask,
max_new_tokens=1200, do_sample=True,
top_k=50, temperature=0.7)
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
print("respone: ------------------------------------- \n"+response)
chat_history.append((context, response)) # or chat_history.append((user_input, response)) if you want to store user input
return response, chat_history
#####
###
###
# Gradio Interface
def gradio_interface(user_input, history):
response, updated_history = chat_with_model(user_input, history)
return response, updated_history
with gr.Blocks() as demo:
gr.Markdown("# 🦙 Llama Instruct Chat with ChromaDB Integration")
with gr.Row():
user_input = gr.Textbox(label="Your Message", lines=2, placeholder="Type your message here...")
submit_btn = gr.Button("Send")
chat_history = gr.State([])
chat_display = gr.Textbox(label="Chat Response", lines=10, placeholder="Chat history will appear here...", interactive=False)
submit_btn.click(gradio_interface, inputs=[user_input, chat_history], outputs=[chat_display, chat_history])
demo.launch(debug=True)
|