File size: 6,546 Bytes
8f82242 c241f02 16ea01c f2ae40f 0b2b447 dc012ce 16ea01c f2ae40f 38824c6 dc012ce 38824c6 16ea01c f2ae40f 8f82242 f7b948e c241f02 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f 53122c5 dc012ce f2ae40f 53122c5 f2ae40f 53122c5 f2ae40f dc012ce f2ae40f 33962f9 f2ae40f 33962f9 dc012ce f2ae40f dc012ce f2ae40f f395287 7af0a2d f395287 dc012ce f2ae40f dc012ce f2ae40f 33962f9 dc012ce f2ae40f dc012ce 53122c5 f2ae40f 53122c5 dc012ce f2ae40f 53122c5 f2ae40f 53122c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
from langfuse import Langfuse
from langfuse.decorators import observe, langfuse_context
import os
import faiss
import pandas as pd
from sentence_transformers import SentenceTransformer
import datetime
# Initialize Langfuse
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-9f2c32d2-266f-421d-9b87-51377f0a268c"
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c"
os.environ["LANGFUSE_HOST"] = "https://chris4k-langfuse-template-space.hf.space" # 🇪🇺 EU region
langfuse = Langfuse()
# Load the Llama model
model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
# Load FAISS and Embeddings
embedder = SentenceTransformer('distiluse-base-multilingual-cased')
url = 'https://www.bofrost.de/datafeed/DE/products.csv'
data = pd.read_csv(url, sep='|')
# Clean and process the dataset
columns_to_keep = ['ID', 'Name', 'Description', 'Price', 'ProductCategory', 'Grammage', 'BasePriceText', 'Rating', 'RatingCount', 'Ingredients', 'CreationDate', 'Keywords', 'Brand']
data_cleaned = data[columns_to_keep]
data_cleaned['Description'] = data_cleaned['Description'].str.replace(r'[^\w\s.,;:\'"/?!€$%&()\[\]{}<>|=+\\-]', ' ', regex=True)
data_cleaned['combined_text'] = data_cleaned.apply(lambda row: ' '.join([str(row[col]) for col in ['Name', 'Description', 'Keywords'] if pd.notnull(row[col])]), axis=1)
# Generate and add embeddings
embeddings = embedder.encode(data_cleaned['combined_text'].tolist(), convert_to_tensor=True).cpu().detach().numpy()
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
faiss_index.add(embeddings)
# Helper function for searching products
def search_products(query, top_k=7):
query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()
distances, indices = faiss_index.search(query_embedding, top_k)
return data_cleaned.iloc[indices[0]].to_dict(orient='records')
# Prompt construction functions
def construct_system_prompt(context):
return f"You are a friendly bot specializing in Bofrost products. Return comprehensive German answers. Always add product IDs. Use the following product descriptions:\n\n{context}\n\n"
def construct_prompt(user_input, context, chat_history, max_history_turns=1):
system_message = construct_system_prompt(context)
prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
for user_msg, assistant_msg in chat_history[-max_history_turns:]:
prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
return prompt
# Main function to interact with the model
@observe()
def chat_with_model(user_input, chat_history=[]):
# Start trace for the entire chat process
trace = langfuse.trace(
name="ai-chat-execution",
user_id="user_12345",
metadata={"email": "[email protected]"},
tags=["chat", "product-query"],
release="v1.0.0"
)
# Span for product search
retrieval_span = trace.span(
name="product-retrieval",
metadata={"source": "faiss-index"},
input={"query": user_input}
)
# Search for products
search_results = search_products(user_input)
if search_results:
context = "Product Context:\n" + "\n".join(
[f"Produkt ID: {p['ID']}, Name: {p['Name']}, Beschreibung: {p['Description']}, Preis: {p['Price']}€" for p in search_results]
)
else:
context = "Das weiß ich nicht."
# End product search span with results
retrieval_span.end(
output={"search_results": search_results},
status_message=f"Found {len(search_results)} products"
)
# Update trace with search context
langfuse_context.update_current_observation(
input={"query": user_input},
output={"context": context},
metadata={"search_results_found": len(search_results)}
)
# Generate prompt for Llama model
prompt = construct_prompt(user_input, context, chat_history)
input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096)
# Span for AI generation
generation_span = trace.span(
name="ai-response-generation",
metadata={"model": "Llama-3.2-3B-Instruct"},
input={"prompt": prompt}
)
outputs = model.generate(input_ids, max_new_tokens=1200, do_sample=True, top_k=50, temperature=0.7)
# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Decode and clean the response to remove unwanted repetitions of "assistant"
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True).strip()
# Remove potential repeated assistant text
response = response.replace("<|assistant|>", "").strip()
# End model generation span
generation_span.end(
output={"response": response},
status_message="AI response generated"
)
# Update Langfuse context with usage details
langfuse_context.update_current_observation(
usage_details={
"input_tokens": len(input_ids[0]),
"output_tokens": len(response)
}
)
# Append the response to the chat history
chat_history.append((user_input, response))
# Update trace final output
trace.update(
metadata={"final_status": "completed"},
output={"summary": response}
)
# Return the response
return response, chat_history
# Gradio interface
def gradio_interface(user_input, history):
response, updated_history = chat_with_model(user_input, history)
return response, updated_history
with gr.Blocks() as demo:
gr.Markdown("# 🦙 Llama Instruct Chat with LangFuse & Faiss Integration")
user_input = gr.Textbox(label="Your Message", lines=2)
submit_btn = gr.Button("Send")
chat_history = gr.State([])
chat_display = gr.Textbox(label="Chat Response", lines=10, interactive=False)
submit_btn.click(gradio_interface, inputs=[user_input, chat_history], outputs=[chat_display, chat_history])
demo.launch(debug=True)
|