Chris4K commited on
Commit
f2ae40f
·
verified ·
1 Parent(s): 92251bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -145
app.py CHANGED
@@ -1,204 +1,105 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import gradio as gr
4
-
5
-
6
  from langfuse import Langfuse
7
- from langfuse.decorators import observe
8
-
9
 
 
10
  langfuse = Langfuse(
11
- secret_key="sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c",
12
- public_key="pk-lf-9f2c32d2-266f-421d-9b87-51377f0a268c",
13
- host="https://chris4k-langfuse-template-space.hf.space"
14
  )
15
 
16
-
17
-
18
- # Load Llama 3.2 model
19
  model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
21
- #model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
22
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
23
 
24
- # Helper function to process long contexts
25
- MAX_TOKENS = 100000 # Replace with the max token limit of the Llama model
26
-
27
-
28
- #########
29
- ###
30
- #########
31
  import faiss
32
- import torch
33
  import pandas as pd
34
  from sentence_transformers import SentenceTransformer
35
- from transformers import AutoTokenizer, AutoModelForCausalLM
36
- import gradio as gr
37
-
38
- # Load Llama model
39
- #model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
40
- #tokenizer = AutoTokenizer.from_pretrained(model_name)
41
- #model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
42
 
43
- # Load Sentence Transformer model for embeddings
44
- embedder = SentenceTransformer('distiluse-base-multilingual-cased') # Suitable for German text
45
-
46
- ########
47
- ###
48
- ###
49
- #####
50
- # Load the CSV data
51
  url = 'https://www.bofrost.de/datafeed/DE/products.csv'
52
  data = pd.read_csv(url, sep='|')
53
 
54
- # List of columns to keep
55
- columns_to_keep = [
56
- 'ID', 'Name', 'Description', 'Price',
57
- 'ProductCategory', 'Grammage',
58
- 'BasePriceText', 'Rating', 'RatingCount',
59
- 'Ingredients', 'CreationDate', 'Keywords', 'Brand'
60
- ]
61
-
62
- # Filter the DataFrame
63
  data_cleaned = data[columns_to_keep]
64
-
65
- # Remove unwanted characters from the 'Description' column
66
  data_cleaned['Description'] = data_cleaned['Description'].str.replace(r'[^\w\s.,;:\'"/?!€$%&()\[\]{}<>|=+\\-]', ' ', regex=True)
67
-
68
- # Combine relevant text columns for embedding
69
  data_cleaned['combined_text'] = data_cleaned.apply(lambda row: ' '.join([str(row[col]) for col in ['Name', 'Description', 'Keywords'] if pd.notnull(row[col])]), axis=1)
70
 
71
- ######
72
- ##
73
- #####
74
-
75
- # Generate embeddings for the combined text
76
- embeddings = embedder.encode(data_cleaned['combined_text'].tolist(), convert_to_tensor=True)
77
-
78
- # Convert embeddings to numpy array
79
- embeddings = embeddings.cpu().detach().numpy()
80
-
81
- # Initialize FAISS index
82
- d = embeddings.shape[1] # Dimension of embeddings
83
- faiss_index = faiss.IndexFlatL2(d)
84
-
85
- # Add embeddings to the index
86
  faiss_index.add(embeddings)
87
 
88
- #######
89
- ##
90
- ######
91
  def search_products(query, top_k=7):
92
- # Generate embedding for the query
93
  query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()
94
-
95
- # Search FAISS index
96
  distances, indices = faiss_index.search(query_embedding, top_k)
 
97
 
98
- # Retrieve corresponding products
99
- results = data_cleaned.iloc[indices[0]].to_dict(orient='records')
100
- return results
101
-
102
 
103
-
104
- # Update the prompt construction to include ChromaDB results
105
- def construct_system_prompt( context):
106
- prompt = f"You are a friendly bot specializing in Bofrost products. Return comprehensive german answers. Always add product ids. Use the following product descriptions:\n\n{context}\n\n"
107
- return prompt
108
-
109
- # Helper function to construct the prompt
110
- def construct_prompt(user_input, context, chat_history, max_history_turns=1): # Added max_history_turns
111
  system_message = construct_system_prompt(context)
112
  prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
113
-
114
- # Limit history to the last max_history_turns
115
- for i, (user_msg, assistant_msg) in enumerate(chat_history[-max_history_turns:]):
116
  prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
117
  prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
118
-
119
  prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
120
- print("-------------------------")
121
- print(prompt)
122
  return prompt
123
 
124
-
 
125
  def chat_with_model(user_input, chat_history=[]):
126
- # Search for relevant products
127
  search_results = search_products(user_input)
128
-
129
- # Create context with search results
130
  if search_results:
131
- context = "Product Context:\n"
132
- for product in search_results:
133
- context += f"Produkt ID: {product['ID']}\n"
134
- context += f"Name: {product['Name']}\n"
135
- context += f"Beschreibung: {product['Description']}\n"
136
- context += f"Preis: {product['Price']}€\n"
137
- context += f"Bewertung: {product['Rating']} ({product['RatingCount']} Bewertungen)\n"
138
- context += f"Kategorie: {product['ProductCategory']}\n"
139
- context += f"Marke: {product['Brand']}\n"
140
- context += "---\n"
141
  else:
142
  context = "Das weiß ich nicht."
143
- print("context: ------------------------------------- \n"+context)
144
-
145
- # LangFuse observation: Search Results
146
- langfuse.observe(
147
- name="search_products",
148
  input={"query": user_input},
149
  output={"context": context},
150
- metadata={"results_count": len(search_results)}
151
  )
152
 
153
-
154
-
155
- # Pass both user_input and context to construct_prompt
156
- prompt = construct_prompt(user_input, context, chat_history) # This line is changed
157
- print("prompt: ------------------------------------- \n"+prompt)
158
-
159
- # LangFuse observation: Prompt Construction
160
- langfuse.observe(
161
- name="construct_prompt",
162
- input={"user_input": user_input, "context": context},
163
- output={"prompt": prompt}
164
  )
165
 
166
- input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cpu")
167
- tokenizer.pad_token = tokenizer.eos_token
168
- attention_mask = torch.ones_like(input_ids).to("cpu")
169
- outputs = model.generate(input_ids, attention_mask=attention_mask,
170
- max_new_tokens=1200, do_sample=True,
171
- top_k=50, temperature=0.7)
172
- response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
173
-
174
-
175
- print("respone: ------------------------------------- \n"+response)
176
- # LangFuse observation: Log LLM response
177
- # LangFuse observation: LLM Response
178
- langfuse.observe(
179
- name="llm_response",
180
- input={"prompt": prompt},
181
- output={"response": response},
182
- metadata={"response_length": len(response)}
183
- )
184
- chat_history.append((context, response)) # or chat_history.append((user_input, response)) if you want to store user input
185
  return response, chat_history
186
 
187
- #####
188
- ###
189
- ###
190
- # Gradio Interface
191
  def gradio_interface(user_input, history):
192
  response, updated_history = chat_with_model(user_input, history)
193
  return response, updated_history
194
 
195
  with gr.Blocks() as demo:
196
- gr.Markdown("# 🦙 Llama Instruct Chat with ChromaDB Integration")
197
- with gr.Row():
198
- user_input = gr.Textbox(label="Your Message", lines=2, placeholder="Type your message here...")
199
- submit_btn = gr.Button("Send")
200
  chat_history = gr.State([])
201
- chat_display = gr.Textbox(label="Chat Response", lines=10, placeholder="Chat history will appear here...", interactive=False)
202
  submit_btn.click(gradio_interface, inputs=[user_input, chat_history], outputs=[chat_display, chat_history])
203
 
204
  demo.launch(debug=True)
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import gradio as gr
 
 
4
  from langfuse import Langfuse
5
+ from langfuse.decorators import observe, langfuse_context
 
6
 
7
+ # Initialize Langfuse
8
  langfuse = Langfuse(
9
+ secret_key="sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c",
10
+ public_key="pk-lf-9f2c32d2-266f-421d-9b87-51377f0a268c",
11
+ host="https://chris4k-langfuse-template-space.hf.space"
12
  )
13
 
14
+ # Load the Llama model
 
 
15
  model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
17
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
18
 
19
+ # Load FAISS and Embeddings
 
 
 
 
 
 
20
  import faiss
 
21
  import pandas as pd
22
  from sentence_transformers import SentenceTransformer
 
 
 
 
 
 
 
23
 
24
+ embedder = SentenceTransformer('distiluse-base-multilingual-cased')
 
 
 
 
 
 
 
25
  url = 'https://www.bofrost.de/datafeed/DE/products.csv'
26
  data = pd.read_csv(url, sep='|')
27
 
28
+ # Clean and process the dataset
29
+ columns_to_keep = ['ID', 'Name', 'Description', 'Price', 'ProductCategory', 'Grammage', 'BasePriceText', 'Rating', 'RatingCount', 'Ingredients', 'CreationDate', 'Keywords', 'Brand']
 
 
 
 
 
 
 
30
  data_cleaned = data[columns_to_keep]
 
 
31
  data_cleaned['Description'] = data_cleaned['Description'].str.replace(r'[^\w\s.,;:\'"/?!€$%&()\[\]{}<>|=+\\-]', ' ', regex=True)
 
 
32
  data_cleaned['combined_text'] = data_cleaned.apply(lambda row: ' '.join([str(row[col]) for col in ['Name', 'Description', 'Keywords'] if pd.notnull(row[col])]), axis=1)
33
 
34
+ # Generate and add embeddings
35
+ embeddings = embedder.encode(data_cleaned['combined_text'].tolist(), convert_to_tensor=True).cpu().detach().numpy()
36
+ faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
 
 
 
 
 
 
 
 
 
 
 
 
37
  faiss_index.add(embeddings)
38
 
39
+ # Helper function for searching products
 
 
40
  def search_products(query, top_k=7):
 
41
  query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()
 
 
42
  distances, indices = faiss_index.search(query_embedding, top_k)
43
+ return data_cleaned.iloc[indices[0]].to_dict(orient='records')
44
 
45
+ # Prompt construction functions
46
+ def construct_system_prompt(context):
47
+ return f"You are a friendly bot specializing in Bofrost products. Return comprehensive German answers. Always add product IDs. Use the following product descriptions:\n\n{context}\n\n"
 
48
 
49
+ def construct_prompt(user_input, context, chat_history, max_history_turns=1):
 
 
 
 
 
 
 
50
  system_message = construct_system_prompt(context)
51
  prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
52
+ for user_msg, assistant_msg in chat_history[-max_history_turns:]:
 
 
53
  prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
54
  prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
 
55
  prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
 
 
56
  return prompt
57
 
58
+ # Main function to interact with the model
59
+ @observe()
60
  def chat_with_model(user_input, chat_history=[]):
61
+ # Search for products
62
  search_results = search_products(user_input)
 
 
63
  if search_results:
64
+ context = "Product Context:\n" + "\n".join(
65
+ [f"Produkt ID: {p['ID']}, Name: {p['Name']}, Beschreibung: {p['Description']}, Preis: {p['Price']}€" for p in search_results]
66
+ )
 
 
 
 
 
 
 
67
  else:
68
  context = "Das weiß ich nicht."
69
+
70
+ langfuse_context.update_current_observation(
 
 
 
71
  input={"query": user_input},
72
  output={"context": context},
73
+ metadata={"search_results_found": len(search_results)}
74
  )
75
 
76
+ # Generate prompt
77
+ prompt = construct_prompt(user_input, context, chat_history)
78
+ input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096)
79
+ outputs = model.generate(input_ids, max_new_tokens=1200, do_sample=True, top_k=50, temperature=0.7)
80
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
81
+
82
+ langfuse_context.update_current_observation(
83
+ usage_details={
84
+ "input_tokens": len(input_ids[0]),
85
+ "output_tokens": len(response)
86
+ }
87
  )
88
 
89
+ chat_history.append((user_input, response))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  return response, chat_history
91
 
92
+ # Gradio interface
 
 
 
93
  def gradio_interface(user_input, history):
94
  response, updated_history = chat_with_model(user_input, history)
95
  return response, updated_history
96
 
97
  with gr.Blocks() as demo:
98
+ gr.Markdown("# 🦙 Llama Instruct Chat with LangFuse Integration")
99
+ user_input = gr.Textbox(label="Your Message", lines=2)
100
+ submit_btn = gr.Button("Send")
 
101
  chat_history = gr.State([])
102
+ chat_display = gr.Textbox(label="Chat Response", lines=10, interactive=False)
103
  submit_btn.click(gradio_interface, inputs=[user_input, chat_history], outputs=[chat_display, chat_history])
104
 
105
  demo.launch(debug=True)