Phoenix21 commited on
Commit
95ca499
·
verified ·
1 Parent(s): 1c73b9c

Update app.py

Browse files

Caching (Persistent Storage)

A new JSON-based cache (query_cache.json) is loaded at startup and saved upon new queries.
Before doing any retrieval or calls to the LLM, we check if a similar query (by cosine similarity of embeddings) is in the cache.
If the similarity is above SIMILARITY_THRESHOLD_CACHE, we return that cached answer immediately (“cache hit”).
Otherwise, we proceed as before and store the new answer to the cache for future fast retrieval.
Detail Control

Added a detail parameter to handle_query() and expand() that modifies the prompt.
In the Gradio UI, a checkbox toggles “In-Depth Answer?” for a more detailed response.
Performance Improvements

Short-circuit queries with high cache similarity to avoid embedding/cross-encoder re-computation and LLM calls.
The rest of the logic is unchanged, so you still get the same retrieval flow and expansions if not served from the cache.

Files changed (1) hide show
  1. app.py +149 -27
app.py CHANGED
@@ -5,6 +5,9 @@ import pandas as pd
5
  import chardet
6
  import logging
7
  import gradio as gr
 
 
 
8
  from typing import Optional, List, Tuple, ClassVar, Dict
9
 
10
  from sentence_transformers import SentenceTransformer, util, CrossEncoder
@@ -237,19 +240,35 @@ class AnswerExpander:
237
  def __init__(self, llm: GeminiLLM):
238
  self.llm = llm
239
 
240
- def expand(self, query: str, retrieved_answers: List[str]) -> str:
 
 
 
 
241
  try:
242
- reference_block = "\n".join(f"- {idx+1}) {ans}" for idx, ans in enumerate(retrieved_answers, start=1))
 
 
 
 
 
 
 
 
 
 
 
243
  prompt = (
244
  f"You are Daily Wellness AI, a friendly wellness expert. Below are multiple "
245
  f"potential answers retrieved from a local knowledge base. You have a user question.\n\n"
246
  f"Question: {query}\n\n"
247
  f"Retrieved Answers:\n{reference_block}\n\n"
248
- "Please synthesize these references into a single cohesive, creative, and brand-aligned response. "
249
- "Add practical tips and positivity, and end with a short inspirational note. "
250
- "Please provide a concise response in no more than 4 sentences.\n\n"
251
  "Disclaimer: This is general wellness information, not a substitute for professional medical advice."
252
  )
 
253
  logger.debug(f"Generated prompt for answer expansion: {prompt}")
254
  response = self.llm._call(prompt)
255
  logger.debug(f"Expanded answer: {response}")
@@ -262,24 +281,110 @@ class AnswerExpander:
262
  answer_expander = AnswerExpander(llm)
263
 
264
  ###############################################################################
265
- # 9) Query Handling
266
  ###############################################################################
267
- def handle_query(query: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  if not query or not isinstance(query, str) or len(query.strip()) == 0:
269
  return "Please provide a valid question."
270
 
271
  try:
272
- # Sanity Check: Determine if the question is relevant to daily wellness
 
 
 
 
 
273
  is_relevant = sanity_checker.is_relevant(query)
274
  if not is_relevant:
275
  return "Your question seems out of context or not related to daily wellness. Please ask a wellness-related question."
276
 
277
- # Proceed with retrieval
278
  retrieved = retriever.retrieve(query)
279
  if not retrieved:
280
  return "I'm sorry, I couldn't find an answer to your question."
281
 
282
- # Check similarity threshold
283
  top_score = retrieved[0][1] # Assuming the list is sorted descending
284
  similarity_threshold = 0.3 # Adjust this threshold based on empirical results
285
 
@@ -291,16 +396,22 @@ def handle_query(query: str) -> str:
291
 
292
  # Optionally, process the web_search_response if needed
293
  # For simplicity, return the web search response directly
294
- return (
295
  f"**Daily Wellness AI**\n\n"
296
  f"{web_search_response}\n\n"
297
  "Disclaimer: This information is retrieved from the web and is not a substitute for professional medical advice.\n\n"
298
  "Wishing you a calm and wonderful day!"
299
  )
 
 
 
300
 
301
- # Proceed with answer expansion using retrieved_answers
302
  responses = [ans[0] for ans in retrieved]
303
- expanded_answer = answer_expander.expand(query, responses)
 
 
 
304
  return expanded_answer
305
  except Exception as e:
306
  logger.error(f"Error handling query: {e}")
@@ -308,11 +419,14 @@ def handle_query(query: str) -> str:
308
  return "An error occurred while processing your request."
309
 
310
  ###############################################################################
311
- # 10) Gradio Interface
312
  ###############################################################################
313
- def gradio_interface(query: str):
 
 
 
314
  try:
315
- response = handle_query(query)
316
  formatted_response = response # Response is already formatted
317
  return formatted_response
318
  except Exception as e:
@@ -320,28 +434,36 @@ def gradio_interface(query: str):
320
  logger.debug("Exception details:", exc_info=True)
321
  return "**An error occurred while processing your request. Please try again later.**"
322
 
 
323
  interface = gr.Interface(
324
  fn=gradio_interface,
325
- inputs=gr.Textbox(
326
- lines=2,
327
- placeholder="e.g., What is box breathing?",
328
- label="Ask Daily Wellness AI"
329
- ),
 
 
 
 
 
 
 
330
  outputs=gr.Markdown(label="Answer from Daily Wellness AI"),
331
  title="Daily Wellness AI",
332
- description="Ask wellness-related questions and receive synthesized, creative answers.",
333
  theme="default",
334
  examples=[
335
- "What is box breathing and how does it help reduce anxiety?",
336
- "Provide a daily wellness schedule incorporating box breathing techniques.",
337
- "What are some tips for maintaining good posture while working at a desk?",
338
- "Who is the CEO of Hugging Face?" # Example of an out-of-context question
339
  ],
340
  allow_flagging="never"
341
  )
342
 
343
  ###############################################################################
344
- # 11) Launch Gradio
345
  ###############################################################################
346
  if __name__ == "__main__":
347
  try:
 
5
  import chardet
6
  import logging
7
  import gradio as gr
8
+ import json
9
+ import hashlib
10
+ import numpy as np # ADDED for easy array handling
11
  from typing import Optional, List, Tuple, ClassVar, Dict
12
 
13
  from sentence_transformers import SentenceTransformer, util, CrossEncoder
 
240
  def __init__(self, llm: GeminiLLM):
241
  self.llm = llm
242
 
243
+ def expand(self, query: str, retrieved_answers: List[str], detail: bool = False) -> str:
244
+ """
245
+ Synthesize answers into a single cohesive response.
246
+ If detail=True, provide a more detailed response.
247
+ """
248
  try:
249
+ reference_block = "\n".join(
250
+ f"- {idx+1}) {ans}" for idx, ans in enumerate(retrieved_answers, start=1)
251
+ )
252
+
253
+ # ADDED: More elaboration if detail=True
254
+ detail_instructions = (
255
+ "Provide a thorough, in-depth explanation, adding relevant tips and context, "
256
+ "while remaining creative and brand-aligned. "
257
+ if detail else
258
+ "Please provide a concise response in no more than 4 sentences."
259
+ )
260
+
261
  prompt = (
262
  f"You are Daily Wellness AI, a friendly wellness expert. Below are multiple "
263
  f"potential answers retrieved from a local knowledge base. You have a user question.\n\n"
264
  f"Question: {query}\n\n"
265
  f"Retrieved Answers:\n{reference_block}\n\n"
266
+ f"Please synthesize these references into a single cohesive, creative, and brand-aligned response. "
267
+ f"{detail_instructions} "
268
+ f"End with a short inspirational note.\n\n"
269
  "Disclaimer: This is general wellness information, not a substitute for professional medical advice."
270
  )
271
+
272
  logger.debug(f"Generated prompt for answer expansion: {prompt}")
273
  response = self.llm._call(prompt)
274
  logger.debug(f"Expanded answer: {response}")
 
281
  answer_expander = AnswerExpander(llm)
282
 
283
  ###############################################################################
284
+ # 9) Persistent Cache (ADDED)
285
  ###############################################################################
286
+ CACHE_FILE = "query_cache.json"
287
+ SIMILARITY_THRESHOLD_CACHE = 0.8 # Adjust for how close a query must be to reuse cache
288
+
289
+ def load_cache() -> Dict:
290
+ """Load the cache from the local JSON file."""
291
+ if os.path.isfile(CACHE_FILE):
292
+ try:
293
+ with open(CACHE_FILE, "r", encoding="utf-8") as f:
294
+ return json.load(f)
295
+ except Exception as e:
296
+ logger.error(f"Failed to load cache file: {e}")
297
+ return {}
298
+ return {}
299
+
300
+ def save_cache(cache_data: Dict):
301
+ """Save the cache dictionary to a local JSON file."""
302
+ try:
303
+ with open(CACHE_FILE, "w", encoding="utf-8") as f:
304
+ json.dump(cache_data, f, ensure_ascii=False, indent=2)
305
+ except Exception as e:
306
+ logger.error(f"Failed to save cache file: {e}")
307
+
308
+ def compute_hash(text: str) -> str:
309
+ """Compute a simple hash for the text to handle duplicates in a consistent way."""
310
+ return hashlib.md5(text.encode("utf-8")).hexdigest()
311
+
312
+ # ADDED: Load cache at startup
313
+ cache_store = load_cache()
314
+
315
+ ###############################################################################
316
+ # 9.1) Utility to attempt cached retrieval (ADDED)
317
+ ###############################################################################
318
+ def get_cached_answer(query: str) -> Optional[str]:
319
+ """
320
+ Returns a cached answer if there's a very similar query in the cache.
321
+ We'll compare embeddings to find if a stored query is above threshold.
322
+ """
323
+ if not cache_store:
324
+ return None
325
+
326
+ # Compute embedding for the incoming query
327
+ query_embedding = embedding_model.encode(query, convert_to_tensor=True)
328
+
329
+ # Check all cached items
330
+ best_score = 0.0
331
+ best_answer = None
332
+
333
+ for cached_q, cache_data in cache_store.items():
334
+ stored_embedding = np.array(cache_data["embedding"], dtype=np.float32)
335
+ score = util.pytorch_cos_sim(query_embedding, stored_embedding)[0].item()
336
+ if score > best_score:
337
+ best_score = score
338
+ best_answer = cache_data["answer"]
339
+
340
+ if best_score >= SIMILARITY_THRESHOLD_CACHE:
341
+ logger.info(f"Cache hit! Similarity: {best_score:.2f}, returning cached answer.")
342
+ return best_answer
343
+ return None
344
+
345
+ def store_in_cache(query: str, answer: str):
346
+ """
347
+ Store a query-answer pair in the cache with the query's embedding.
348
+ """
349
+ query_embedding = embedding_model.encode(query, convert_to_tensor=True).cpu().tolist()
350
+ cache_key = compute_hash(query)
351
+ cache_store[cache_key] = {
352
+ "query": query,
353
+ "answer": answer,
354
+ "embedding": query_embedding
355
+ }
356
+ save_cache(cache_store)
357
+
358
+ ###############################################################################
359
+ # 10) Query Handling
360
+ ###############################################################################
361
+ def handle_query(query: str, detail: bool = False) -> str:
362
+ """
363
+ Main function to process the query.
364
+ :param query: The user's question.
365
+ :param detail: Whether the user wants a more detailed response.
366
+ :return: Response string from Daily Wellness AI.
367
+ """
368
  if not query or not isinstance(query, str) or len(query.strip()) == 0:
369
  return "Please provide a valid question."
370
 
371
  try:
372
+ # 1) Check the cache first (ADDED for speed)
373
+ cached_answer = get_cached_answer(query)
374
+ if cached_answer:
375
+ return cached_answer
376
+
377
+ # 2) Sanity Check: Determine if the question is relevant to daily wellness
378
  is_relevant = sanity_checker.is_relevant(query)
379
  if not is_relevant:
380
  return "Your question seems out of context or not related to daily wellness. Please ask a wellness-related question."
381
 
382
+ # 3) Proceed with retrieval
383
  retrieved = retriever.retrieve(query)
384
  if not retrieved:
385
  return "I'm sorry, I couldn't find an answer to your question."
386
 
387
+ # 4) Check similarity threshold
388
  top_score = retrieved[0][1] # Assuming the list is sorted descending
389
  similarity_threshold = 0.3 # Adjust this threshold based on empirical results
390
 
 
396
 
397
  # Optionally, process the web_search_response if needed
398
  # For simplicity, return the web search response directly
399
+ answer = (
400
  f"**Daily Wellness AI**\n\n"
401
  f"{web_search_response}\n\n"
402
  "Disclaimer: This information is retrieved from the web and is not a substitute for professional medical advice.\n\n"
403
  "Wishing you a calm and wonderful day!"
404
  )
405
+ # Store in cache before returning
406
+ store_in_cache(query, answer)
407
+ return answer
408
 
409
+ # 5) Proceed with answer expansion using retrieved_answers
410
  responses = [ans[0] for ans in retrieved]
411
+ expanded_answer = answer_expander.expand(query, responses, detail=detail)
412
+
413
+ # 6) Store in cache (ADDED)
414
+ store_in_cache(query, expanded_answer)
415
  return expanded_answer
416
  except Exception as e:
417
  logger.error(f"Error handling query: {e}")
 
419
  return "An error occurred while processing your request."
420
 
421
  ###############################################################################
422
+ # 11) Gradio Interface
423
  ###############################################################################
424
+ def gradio_interface(query: str, detail: bool):
425
+ """
426
+ Gradio interface function that optionally takes a detail parameter for longer responses.
427
+ """
428
  try:
429
+ response = handle_query(query, detail=detail)
430
  formatted_response = response # Response is already formatted
431
  return formatted_response
432
  except Exception as e:
 
434
  logger.debug("Exception details:", exc_info=True)
435
  return "**An error occurred while processing your request. Please try again later.**"
436
 
437
+ # ADDED: We now have a checkbox for detail in the Gradio UI
438
  interface = gr.Interface(
439
  fn=gradio_interface,
440
+ inputs=[
441
+ gr.Textbox(
442
+ lines=2,
443
+ placeholder="e.g., What is box breathing?",
444
+ label="Ask Daily Wellness AI"
445
+ ),
446
+ gr.Checkbox(
447
+ label="In-Depth Answer?",
448
+ value=False,
449
+ info="Check for a longer, more detailed response."
450
+ )
451
+ ],
452
  outputs=gr.Markdown(label="Answer from Daily Wellness AI"),
453
  title="Daily Wellness AI",
454
+ description="Ask wellness-related questions and receive synthesized, creative answers. Optionally request a more in-depth response.",
455
  theme="default",
456
  examples=[
457
+ ["What is box breathing and how does it help reduce anxiety?", True],
458
+ ["Provide a daily wellness schedule incorporating box breathing techniques.", False],
459
+ ["What are some tips for maintaining good posture while working at a desk?", True],
460
+ ["Who is the CEO of Hugging Face?", False] # Example of an out-of-context question
461
  ],
462
  allow_flagging="never"
463
  )
464
 
465
  ###############################################################################
466
+ # 12) Launch Gradio
467
  ###############################################################################
468
  if __name__ == "__main__":
469
  try: