Chris4K commited on
Commit
33962f9
·
verified ·
1 Parent(s): 16ea01c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py CHANGED
@@ -4,6 +4,8 @@ import gradio as gr
4
 
5
 
6
  from langfuse import Langfuse
 
 
7
 
8
  langfuse = Langfuse(
9
  secret_key="sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c",
@@ -11,6 +13,8 @@ langfuse = Langfuse(
11
  host="https://chris4k-langfuse-template-space.hf.space"
12
  )
13
 
 
 
14
  # Load Llama 3.2 model
15
  model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -117,6 +121,7 @@ def construct_prompt(user_input, context, chat_history, max_history_turns=1): #
117
  print(prompt)
118
  return prompt
119
 
 
120
  def chat_with_model(user_input, chat_history=[]):
121
  # Search for relevant products
122
  search_results = search_products(user_input)
@@ -136,9 +141,26 @@ def chat_with_model(user_input, chat_history=[]):
136
  else:
137
  context = "Das weiß ich nicht."
138
  print("context: ------------------------------------- \n"+context)
 
 
 
 
 
 
 
 
 
139
  # Pass both user_input and context to construct_prompt
140
  prompt = construct_prompt(user_input, context, chat_history) # This line is changed
141
  print("prompt: ------------------------------------- \n"+prompt)
 
 
 
 
 
 
 
 
142
  input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cpu")
143
  tokenizer.pad_token = tokenizer.eos_token
144
  attention_mask = torch.ones_like(input_ids).to("cpu")
@@ -146,7 +168,16 @@ def chat_with_model(user_input, chat_history=[]):
146
  max_new_tokens=1200, do_sample=True,
147
  top_k=50, temperature=0.7)
148
  response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
 
 
149
  print("respone: ------------------------------------- \n"+response)
 
 
 
 
 
 
 
150
  chat_history.append((context, response)) # or chat_history.append((user_input, response)) if you want to store user input
151
  return response, chat_history
152
 
 
4
 
5
 
6
  from langfuse import Langfuse
7
+ from langfuse.decorators import observe
8
+
9
 
10
  langfuse = Langfuse(
11
  secret_key="sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c",
 
13
  host="https://chris4k-langfuse-template-space.hf.space"
14
  )
15
 
16
+
17
+
18
  # Load Llama 3.2 model
19
  model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
121
  print(prompt)
122
  return prompt
123
 
124
+ @observe()
125
  def chat_with_model(user_input, chat_history=[]):
126
  # Search for relevant products
127
  search_results = search_products(user_input)
 
141
  else:
142
  context = "Das weiß ich nicht."
143
  print("context: ------------------------------------- \n"+context)
144
+
145
+ langfuse.observe(
146
+ name="search_products",
147
+ input={"query": user_input},
148
+ output={"context": context},
149
+ metadata={"search_results_found": len(search_results) if search_results else 0}
150
+ )
151
+
152
+
153
  # Pass both user_input and context to construct_prompt
154
  prompt = construct_prompt(user_input, context, chat_history) # This line is changed
155
  print("prompt: ------------------------------------- \n"+prompt)
156
+
157
+ # LangFuse observation: Log prompt construction
158
+ langfuse.observe(
159
+ name="construct_prompt",
160
+ input={"user_input": user_input, "context": context, "chat_history": chat_history},
161
+ output={"prompt": prompt}
162
+ )
163
+
164
  input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cpu")
165
  tokenizer.pad_token = tokenizer.eos_token
166
  attention_mask = torch.ones_like(input_ids).to("cpu")
 
168
  max_new_tokens=1200, do_sample=True,
169
  top_k=50, temperature=0.7)
170
  response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
171
+
172
+
173
  print("respone: ------------------------------------- \n"+response)
174
+ # LangFuse observation: Log LLM response
175
+ langfuse.observe(
176
+ name="llm_response",
177
+ input={"prompt": prompt},
178
+ output={"response": response},
179
+ metadata={"response_length": len(response)}
180
+ )
181
  chat_history.append((context, response)) # or chat_history.append((user_input, response)) if you want to store user input
182
  return response, chat_history
183