Update app.py
Browse filesCaching (Persistent Storage)
A new JSON-based cache (query_cache.json) is loaded at startup and saved upon new queries.
Before doing any retrieval or calls to the LLM, we check if a similar query (by cosine similarity of embeddings) is in the cache.
If the similarity is above SIMILARITY_THRESHOLD_CACHE, we return that cached answer immediately (“cache hit”).
Otherwise, we proceed as before and store the new answer to the cache for future fast retrieval.
Detail Control
Added a detail parameter to handle_query() and expand() that modifies the prompt.
In the Gradio UI, a checkbox toggles “In-Depth Answer?” for a more detailed response.
Performance Improvements
Short-circuit queries with high cache similarity to avoid embedding/cross-encoder re-computation and LLM calls.
The rest of the logic is unchanged, so you still get the same retrieval flow and expansions if not served from the cache.
@@ -5,6 +5,9 @@ import pandas as pd
|
|
5 |
import chardet
|
6 |
import logging
|
7 |
import gradio as gr
|
|
|
|
|
|
|
8 |
from typing import Optional, List, Tuple, ClassVar, Dict
|
9 |
|
10 |
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
@@ -237,19 +240,35 @@ class AnswerExpander:
|
|
237 |
def __init__(self, llm: GeminiLLM):
|
238 |
self.llm = llm
|
239 |
|
240 |
-
def expand(self, query: str, retrieved_answers: List[str]) -> str:
|
|
|
|
|
|
|
|
|
241 |
try:
|
242 |
-
reference_block = "\n".join(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
prompt = (
|
244 |
f"You are Daily Wellness AI, a friendly wellness expert. Below are multiple "
|
245 |
f"potential answers retrieved from a local knowledge base. You have a user question.\n\n"
|
246 |
f"Question: {query}\n\n"
|
247 |
f"Retrieved Answers:\n{reference_block}\n\n"
|
248 |
-
"Please synthesize these references into a single cohesive, creative, and brand-aligned response. "
|
249 |
-
"
|
250 |
-
"
|
251 |
"Disclaimer: This is general wellness information, not a substitute for professional medical advice."
|
252 |
)
|
|
|
253 |
logger.debug(f"Generated prompt for answer expansion: {prompt}")
|
254 |
response = self.llm._call(prompt)
|
255 |
logger.debug(f"Expanded answer: {response}")
|
@@ -262,24 +281,110 @@ class AnswerExpander:
|
|
262 |
answer_expander = AnswerExpander(llm)
|
263 |
|
264 |
###############################################################################
|
265 |
-
# 9)
|
266 |
###############################################################################
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
if not query or not isinstance(query, str) or len(query.strip()) == 0:
|
269 |
return "Please provide a valid question."
|
270 |
|
271 |
try:
|
272 |
-
#
|
|
|
|
|
|
|
|
|
|
|
273 |
is_relevant = sanity_checker.is_relevant(query)
|
274 |
if not is_relevant:
|
275 |
return "Your question seems out of context or not related to daily wellness. Please ask a wellness-related question."
|
276 |
|
277 |
-
# Proceed with retrieval
|
278 |
retrieved = retriever.retrieve(query)
|
279 |
if not retrieved:
|
280 |
return "I'm sorry, I couldn't find an answer to your question."
|
281 |
|
282 |
-
# Check similarity threshold
|
283 |
top_score = retrieved[0][1] # Assuming the list is sorted descending
|
284 |
similarity_threshold = 0.3 # Adjust this threshold based on empirical results
|
285 |
|
@@ -291,16 +396,22 @@ def handle_query(query: str) -> str:
|
|
291 |
|
292 |
# Optionally, process the web_search_response if needed
|
293 |
# For simplicity, return the web search response directly
|
294 |
-
|
295 |
f"**Daily Wellness AI**\n\n"
|
296 |
f"{web_search_response}\n\n"
|
297 |
"Disclaimer: This information is retrieved from the web and is not a substitute for professional medical advice.\n\n"
|
298 |
"Wishing you a calm and wonderful day!"
|
299 |
)
|
|
|
|
|
|
|
300 |
|
301 |
-
# Proceed with answer expansion using retrieved_answers
|
302 |
responses = [ans[0] for ans in retrieved]
|
303 |
-
expanded_answer = answer_expander.expand(query, responses)
|
|
|
|
|
|
|
304 |
return expanded_answer
|
305 |
except Exception as e:
|
306 |
logger.error(f"Error handling query: {e}")
|
@@ -308,11 +419,14 @@ def handle_query(query: str) -> str:
|
|
308 |
return "An error occurred while processing your request."
|
309 |
|
310 |
###############################################################################
|
311 |
-
#
|
312 |
###############################################################################
|
313 |
-
def gradio_interface(query: str):
|
|
|
|
|
|
|
314 |
try:
|
315 |
-
response = handle_query(query)
|
316 |
formatted_response = response # Response is already formatted
|
317 |
return formatted_response
|
318 |
except Exception as e:
|
@@ -320,28 +434,36 @@ def gradio_interface(query: str):
|
|
320 |
logger.debug("Exception details:", exc_info=True)
|
321 |
return "**An error occurred while processing your request. Please try again later.**"
|
322 |
|
|
|
323 |
interface = gr.Interface(
|
324 |
fn=gradio_interface,
|
325 |
-
inputs=
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
outputs=gr.Markdown(label="Answer from Daily Wellness AI"),
|
331 |
title="Daily Wellness AI",
|
332 |
-
description="Ask wellness-related questions and receive synthesized, creative answers.",
|
333 |
theme="default",
|
334 |
examples=[
|
335 |
-
"What is box breathing and how does it help reduce anxiety?",
|
336 |
-
"Provide a daily wellness schedule incorporating box breathing techniques.",
|
337 |
-
"What are some tips for maintaining good posture while working at a desk?",
|
338 |
-
"Who is the CEO of Hugging Face?" # Example of an out-of-context question
|
339 |
],
|
340 |
allow_flagging="never"
|
341 |
)
|
342 |
|
343 |
###############################################################################
|
344 |
-
#
|
345 |
###############################################################################
|
346 |
if __name__ == "__main__":
|
347 |
try:
|
|
|
5 |
import chardet
|
6 |
import logging
|
7 |
import gradio as gr
|
8 |
+
import json
|
9 |
+
import hashlib
|
10 |
+
import numpy as np # ADDED for easy array handling
|
11 |
from typing import Optional, List, Tuple, ClassVar, Dict
|
12 |
|
13 |
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
|
|
240 |
def __init__(self, llm: GeminiLLM):
|
241 |
self.llm = llm
|
242 |
|
243 |
+
def expand(self, query: str, retrieved_answers: List[str], detail: bool = False) -> str:
|
244 |
+
"""
|
245 |
+
Synthesize answers into a single cohesive response.
|
246 |
+
If detail=True, provide a more detailed response.
|
247 |
+
"""
|
248 |
try:
|
249 |
+
reference_block = "\n".join(
|
250 |
+
f"- {idx+1}) {ans}" for idx, ans in enumerate(retrieved_answers, start=1)
|
251 |
+
)
|
252 |
+
|
253 |
+
# ADDED: More elaboration if detail=True
|
254 |
+
detail_instructions = (
|
255 |
+
"Provide a thorough, in-depth explanation, adding relevant tips and context, "
|
256 |
+
"while remaining creative and brand-aligned. "
|
257 |
+
if detail else
|
258 |
+
"Please provide a concise response in no more than 4 sentences."
|
259 |
+
)
|
260 |
+
|
261 |
prompt = (
|
262 |
f"You are Daily Wellness AI, a friendly wellness expert. Below are multiple "
|
263 |
f"potential answers retrieved from a local knowledge base. You have a user question.\n\n"
|
264 |
f"Question: {query}\n\n"
|
265 |
f"Retrieved Answers:\n{reference_block}\n\n"
|
266 |
+
f"Please synthesize these references into a single cohesive, creative, and brand-aligned response. "
|
267 |
+
f"{detail_instructions} "
|
268 |
+
f"End with a short inspirational note.\n\n"
|
269 |
"Disclaimer: This is general wellness information, not a substitute for professional medical advice."
|
270 |
)
|
271 |
+
|
272 |
logger.debug(f"Generated prompt for answer expansion: {prompt}")
|
273 |
response = self.llm._call(prompt)
|
274 |
logger.debug(f"Expanded answer: {response}")
|
|
|
281 |
answer_expander = AnswerExpander(llm)
|
282 |
|
283 |
###############################################################################
|
284 |
+
# 9) Persistent Cache (ADDED)
|
285 |
###############################################################################
|
286 |
+
CACHE_FILE = "query_cache.json"
|
287 |
+
SIMILARITY_THRESHOLD_CACHE = 0.8 # Adjust for how close a query must be to reuse cache
|
288 |
+
|
289 |
+
def load_cache() -> Dict:
|
290 |
+
"""Load the cache from the local JSON file."""
|
291 |
+
if os.path.isfile(CACHE_FILE):
|
292 |
+
try:
|
293 |
+
with open(CACHE_FILE, "r", encoding="utf-8") as f:
|
294 |
+
return json.load(f)
|
295 |
+
except Exception as e:
|
296 |
+
logger.error(f"Failed to load cache file: {e}")
|
297 |
+
return {}
|
298 |
+
return {}
|
299 |
+
|
300 |
+
def save_cache(cache_data: Dict):
|
301 |
+
"""Save the cache dictionary to a local JSON file."""
|
302 |
+
try:
|
303 |
+
with open(CACHE_FILE, "w", encoding="utf-8") as f:
|
304 |
+
json.dump(cache_data, f, ensure_ascii=False, indent=2)
|
305 |
+
except Exception as e:
|
306 |
+
logger.error(f"Failed to save cache file: {e}")
|
307 |
+
|
308 |
+
def compute_hash(text: str) -> str:
|
309 |
+
"""Compute a simple hash for the text to handle duplicates in a consistent way."""
|
310 |
+
return hashlib.md5(text.encode("utf-8")).hexdigest()
|
311 |
+
|
312 |
+
# ADDED: Load cache at startup
|
313 |
+
cache_store = load_cache()
|
314 |
+
|
315 |
+
###############################################################################
|
316 |
+
# 9.1) Utility to attempt cached retrieval (ADDED)
|
317 |
+
###############################################################################
|
318 |
+
def get_cached_answer(query: str) -> Optional[str]:
|
319 |
+
"""
|
320 |
+
Returns a cached answer if there's a very similar query in the cache.
|
321 |
+
We'll compare embeddings to find if a stored query is above threshold.
|
322 |
+
"""
|
323 |
+
if not cache_store:
|
324 |
+
return None
|
325 |
+
|
326 |
+
# Compute embedding for the incoming query
|
327 |
+
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
|
328 |
+
|
329 |
+
# Check all cached items
|
330 |
+
best_score = 0.0
|
331 |
+
best_answer = None
|
332 |
+
|
333 |
+
for cached_q, cache_data in cache_store.items():
|
334 |
+
stored_embedding = np.array(cache_data["embedding"], dtype=np.float32)
|
335 |
+
score = util.pytorch_cos_sim(query_embedding, stored_embedding)[0].item()
|
336 |
+
if score > best_score:
|
337 |
+
best_score = score
|
338 |
+
best_answer = cache_data["answer"]
|
339 |
+
|
340 |
+
if best_score >= SIMILARITY_THRESHOLD_CACHE:
|
341 |
+
logger.info(f"Cache hit! Similarity: {best_score:.2f}, returning cached answer.")
|
342 |
+
return best_answer
|
343 |
+
return None
|
344 |
+
|
345 |
+
def store_in_cache(query: str, answer: str):
|
346 |
+
"""
|
347 |
+
Store a query-answer pair in the cache with the query's embedding.
|
348 |
+
"""
|
349 |
+
query_embedding = embedding_model.encode(query, convert_to_tensor=True).cpu().tolist()
|
350 |
+
cache_key = compute_hash(query)
|
351 |
+
cache_store[cache_key] = {
|
352 |
+
"query": query,
|
353 |
+
"answer": answer,
|
354 |
+
"embedding": query_embedding
|
355 |
+
}
|
356 |
+
save_cache(cache_store)
|
357 |
+
|
358 |
+
###############################################################################
|
359 |
+
# 10) Query Handling
|
360 |
+
###############################################################################
|
361 |
+
def handle_query(query: str, detail: bool = False) -> str:
|
362 |
+
"""
|
363 |
+
Main function to process the query.
|
364 |
+
:param query: The user's question.
|
365 |
+
:param detail: Whether the user wants a more detailed response.
|
366 |
+
:return: Response string from Daily Wellness AI.
|
367 |
+
"""
|
368 |
if not query or not isinstance(query, str) or len(query.strip()) == 0:
|
369 |
return "Please provide a valid question."
|
370 |
|
371 |
try:
|
372 |
+
# 1) Check the cache first (ADDED for speed)
|
373 |
+
cached_answer = get_cached_answer(query)
|
374 |
+
if cached_answer:
|
375 |
+
return cached_answer
|
376 |
+
|
377 |
+
# 2) Sanity Check: Determine if the question is relevant to daily wellness
|
378 |
is_relevant = sanity_checker.is_relevant(query)
|
379 |
if not is_relevant:
|
380 |
return "Your question seems out of context or not related to daily wellness. Please ask a wellness-related question."
|
381 |
|
382 |
+
# 3) Proceed with retrieval
|
383 |
retrieved = retriever.retrieve(query)
|
384 |
if not retrieved:
|
385 |
return "I'm sorry, I couldn't find an answer to your question."
|
386 |
|
387 |
+
# 4) Check similarity threshold
|
388 |
top_score = retrieved[0][1] # Assuming the list is sorted descending
|
389 |
similarity_threshold = 0.3 # Adjust this threshold based on empirical results
|
390 |
|
|
|
396 |
|
397 |
# Optionally, process the web_search_response if needed
|
398 |
# For simplicity, return the web search response directly
|
399 |
+
answer = (
|
400 |
f"**Daily Wellness AI**\n\n"
|
401 |
f"{web_search_response}\n\n"
|
402 |
"Disclaimer: This information is retrieved from the web and is not a substitute for professional medical advice.\n\n"
|
403 |
"Wishing you a calm and wonderful day!"
|
404 |
)
|
405 |
+
# Store in cache before returning
|
406 |
+
store_in_cache(query, answer)
|
407 |
+
return answer
|
408 |
|
409 |
+
# 5) Proceed with answer expansion using retrieved_answers
|
410 |
responses = [ans[0] for ans in retrieved]
|
411 |
+
expanded_answer = answer_expander.expand(query, responses, detail=detail)
|
412 |
+
|
413 |
+
# 6) Store in cache (ADDED)
|
414 |
+
store_in_cache(query, expanded_answer)
|
415 |
return expanded_answer
|
416 |
except Exception as e:
|
417 |
logger.error(f"Error handling query: {e}")
|
|
|
419 |
return "An error occurred while processing your request."
|
420 |
|
421 |
###############################################################################
|
422 |
+
# 11) Gradio Interface
|
423 |
###############################################################################
|
424 |
+
def gradio_interface(query: str, detail: bool):
|
425 |
+
"""
|
426 |
+
Gradio interface function that optionally takes a detail parameter for longer responses.
|
427 |
+
"""
|
428 |
try:
|
429 |
+
response = handle_query(query, detail=detail)
|
430 |
formatted_response = response # Response is already formatted
|
431 |
return formatted_response
|
432 |
except Exception as e:
|
|
|
434 |
logger.debug("Exception details:", exc_info=True)
|
435 |
return "**An error occurred while processing your request. Please try again later.**"
|
436 |
|
437 |
+
# ADDED: We now have a checkbox for detail in the Gradio UI
|
438 |
interface = gr.Interface(
|
439 |
fn=gradio_interface,
|
440 |
+
inputs=[
|
441 |
+
gr.Textbox(
|
442 |
+
lines=2,
|
443 |
+
placeholder="e.g., What is box breathing?",
|
444 |
+
label="Ask Daily Wellness AI"
|
445 |
+
),
|
446 |
+
gr.Checkbox(
|
447 |
+
label="In-Depth Answer?",
|
448 |
+
value=False,
|
449 |
+
info="Check for a longer, more detailed response."
|
450 |
+
)
|
451 |
+
],
|
452 |
outputs=gr.Markdown(label="Answer from Daily Wellness AI"),
|
453 |
title="Daily Wellness AI",
|
454 |
+
description="Ask wellness-related questions and receive synthesized, creative answers. Optionally request a more in-depth response.",
|
455 |
theme="default",
|
456 |
examples=[
|
457 |
+
["What is box breathing and how does it help reduce anxiety?", True],
|
458 |
+
["Provide a daily wellness schedule incorporating box breathing techniques.", False],
|
459 |
+
["What are some tips for maintaining good posture while working at a desk?", True],
|
460 |
+
["Who is the CEO of Hugging Face?", False] # Example of an out-of-context question
|
461 |
],
|
462 |
allow_flagging="never"
|
463 |
)
|
464 |
|
465 |
###############################################################################
|
466 |
+
# 12) Launch Gradio
|
467 |
###############################################################################
|
468 |
if __name__ == "__main__":
|
469 |
try:
|