Spaces:

Phoenix21
/

chatbot

Sleeping

App Files Files Community

Phoenix21 commited on Jan 3

Commit

95ca499

verified ·

1 Parent(s): 1c73b9c

Update app.py

Browse files

Caching (Persistent Storage)

A new JSON-based cache (query_cache.json) is loaded at startup and saved upon new queries.
Before doing any retrieval or calls to the LLM, we check if a similar query (by cosine similarity of embeddings) is in the cache.
If the similarity is above SIMILARITY_THRESHOLD_CACHE, we return that cached answer immediately (“cache hit”).
Otherwise, we proceed as before and store the new answer to the cache for future fast retrieval.
Detail Control

Added a detail parameter to handle_query() and expand() that modifies the prompt.
In the Gradio UI, a checkbox toggles “In-Depth Answer?” for a more detailed response.
Performance Improvements

Short-circuit queries with high cache similarity to avoid embedding/cross-encoder re-computation and LLM calls.
The rest of the logic is unchanged, so you still get the same retrieval flow and expansions if not served from the cache.

Files changed (1) hide show

app.py +149 -27

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import pandas as pd
 import chardet
 import logging
 import gradio as gr
 from typing import Optional, List, Tuple, ClassVar, Dict
 from sentence_transformers import SentenceTransformer, util, CrossEncoder
@@ -237,19 +240,35 @@ class AnswerExpander:
     def __init__(self, llm: GeminiLLM):
         self.llm = llm
-    def expand(self, query: str, retrieved_answers: List[str]) -> str:
         try:
-            reference_block = "\n".join(f"- {idx+1}) {ans}" for idx, ans in enumerate(retrieved_answers, start=1))
             prompt = (
                 f"You are Daily Wellness AI, a friendly wellness expert. Below are multiple "
                 f"potential answers retrieved from a local knowledge base. You have a user question.\n\n"
                 f"Question: {query}\n\n"
                 f"Retrieved Answers:\n{reference_block}\n\n"
-                "Please synthesize these references into a single cohesive, creative, and brand-aligned response. "
-                "Add practical tips and positivity, and end with a short inspirational note. "
-                "Please provide a concise response in no more than 4 sentences.\n\n"
                 "Disclaimer: This is general wellness information, not a substitute for professional medical advice."
             )
             logger.debug(f"Generated prompt for answer expansion: {prompt}")
             response = self.llm._call(prompt)
             logger.debug(f"Expanded answer: {response}")
@@ -262,24 +281,110 @@ class AnswerExpander:
 answer_expander = AnswerExpander(llm)
 ###############################################################################
-# 9) Query Handling
 ###############################################################################
-def handle_query(query: str) -> str:
     if not query or not isinstance(query, str) or len(query.strip()) == 0:
         return "Please provide a valid question."
     try:
-        # Sanity Check: Determine if the question is relevant to daily wellness
         is_relevant = sanity_checker.is_relevant(query)
         if not is_relevant:
             return "Your question seems out of context or not related to daily wellness. Please ask a wellness-related question."
-        # Proceed with retrieval
         retrieved = retriever.retrieve(query)
         if not retrieved:
             return "I'm sorry, I couldn't find an answer to your question."
-        # Check similarity threshold
         top_score = retrieved[0][1]  # Assuming the list is sorted descending
         similarity_threshold = 0.3  # Adjust this threshold based on empirical results
@@ -291,16 +396,22 @@ def handle_query(query: str) -> str:
             # Optionally, process the web_search_response if needed
             # For simplicity, return the web search response directly
-            return (
                 f"**Daily Wellness AI**\n\n"
                 f"{web_search_response}\n\n"
                 "Disclaimer: This information is retrieved from the web and is not a substitute for professional medical advice.\n\n"
                 "Wishing you a calm and wonderful day!"
             )
-        # Proceed with answer expansion using retrieved_answers
         responses = [ans[0] for ans in retrieved]
-        expanded_answer = answer_expander.expand(query, responses)
         return expanded_answer
     except Exception as e:
         logger.error(f"Error handling query: {e}")
@@ -308,11 +419,14 @@ def handle_query(query: str) -> str:
         return "An error occurred while processing your request."
 ###############################################################################
-# 10) Gradio Interface
 ###############################################################################
-def gradio_interface(query: str):
     try:
-        response = handle_query(query)
         formatted_response = response  # Response is already formatted
         return formatted_response
     except Exception as e:
@@ -320,28 +434,36 @@ def gradio_interface(query: str):
         logger.debug("Exception details:", exc_info=True)
         return "**An error occurred while processing your request. Please try again later.**"
 interface = gr.Interface(
     fn=gradio_interface,
-    inputs=gr.Textbox(
-        lines=2,
-        placeholder="e.g., What is box breathing?",
-        label="Ask Daily Wellness AI"
-    ),
     outputs=gr.Markdown(label="Answer from Daily Wellness AI"),
     title="Daily Wellness AI",
-    description="Ask wellness-related questions and receive synthesized, creative answers.",
     theme="default",
     examples=[
-        "What is box breathing and how does it help reduce anxiety?",
-        "Provide a daily wellness schedule incorporating box breathing techniques.",
-        "What are some tips for maintaining good posture while working at a desk?",
-        "Who is the CEO of Hugging Face?"  # Example of an out-of-context question
     ],
     allow_flagging="never"
 )
 ###############################################################################
-# 11) Launch Gradio
 ###############################################################################
 if __name__ == "__main__":
     try:

 import chardet
 import logging
 import gradio as gr
+import json
+import hashlib
+import numpy as np  # ADDED for easy array handling
 from typing import Optional, List, Tuple, ClassVar, Dict
 from sentence_transformers import SentenceTransformer, util, CrossEncoder
     def __init__(self, llm: GeminiLLM):
         self.llm = llm
+    def expand(self, query: str, retrieved_answers: List[str], detail: bool = False) -> str:
+        """
+        Synthesize answers into a single cohesive response.
+        If detail=True, provide a more detailed response.
+        """
         try:
+            reference_block = "\n".join(
+                f"- {idx+1}) {ans}" for idx, ans in enumerate(retrieved_answers, start=1)
+            )
+            # ADDED: More elaboration if detail=True
+            detail_instructions = (
+                "Provide a thorough, in-depth explanation, adding relevant tips and context, "
+                "while remaining creative and brand-aligned. "
+                if detail else
+                "Please provide a concise response in no more than 4 sentences."
+            )
             prompt = (
                 f"You are Daily Wellness AI, a friendly wellness expert. Below are multiple "
                 f"potential answers retrieved from a local knowledge base. You have a user question.\n\n"
                 f"Question: {query}\n\n"
                 f"Retrieved Answers:\n{reference_block}\n\n"
+                f"Please synthesize these references into a single cohesive, creative, and brand-aligned response. "
+                f"{detail_instructions} "
+                f"End with a short inspirational note.\n\n"
                 "Disclaimer: This is general wellness information, not a substitute for professional medical advice."
             )
             logger.debug(f"Generated prompt for answer expansion: {prompt}")
             response = self.llm._call(prompt)
             logger.debug(f"Expanded answer: {response}")
 answer_expander = AnswerExpander(llm)
 ###############################################################################
+# 9) Persistent Cache (ADDED)
 ###############################################################################
+CACHE_FILE = "query_cache.json"
+SIMILARITY_THRESHOLD_CACHE = 0.8  # Adjust for how close a query must be to reuse cache
+def load_cache() -> Dict:
+    """Load the cache from the local JSON file."""
+    if os.path.isfile(CACHE_FILE):
+        try:
+            with open(CACHE_FILE, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except Exception as e:
+            logger.error(f"Failed to load cache file: {e}")
+            return {}
+    return {}
+def save_cache(cache_data: Dict):
+    """Save the cache dictionary to a local JSON file."""
+    try:
+        with open(CACHE_FILE, "w", encoding="utf-8") as f:
+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
+    except Exception as e:
+        logger.error(f"Failed to save cache file: {e}")
+def compute_hash(text: str) -> str:
+    """Compute a simple hash for the text to handle duplicates in a consistent way."""
+    return hashlib.md5(text.encode("utf-8")).hexdigest()
+# ADDED: Load cache at startup
+cache_store = load_cache()
+###############################################################################
+# 9.1) Utility to attempt cached retrieval (ADDED)
+###############################################################################
+def get_cached_answer(query: str) -> Optional[str]:
+    """
+    Returns a cached answer if there's a very similar query in the cache.
+    We'll compare embeddings to find if a stored query is above threshold.
+    """
+    if not cache_store:
+        return None
+    # Compute embedding for the incoming query
+    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
+    # Check all cached items
+    best_score = 0.0
+    best_answer = None
+    for cached_q, cache_data in cache_store.items():
+        stored_embedding = np.array(cache_data["embedding"], dtype=np.float32)
+        score = util.pytorch_cos_sim(query_embedding, stored_embedding)[0].item()
+        if score > best_score:
+            best_score = score
+            best_answer = cache_data["answer"]
+    if best_score >= SIMILARITY_THRESHOLD_CACHE:
+        logger.info(f"Cache hit! Similarity: {best_score:.2f}, returning cached answer.")
+        return best_answer
+    return None
+def store_in_cache(query: str, answer: str):
+    """
+    Store a query-answer pair in the cache with the query's embedding.
+    """
+    query_embedding = embedding_model.encode(query, convert_to_tensor=True).cpu().tolist()
+    cache_key = compute_hash(query)
+    cache_store[cache_key] = {
+        "query": query,
+        "answer": answer,
+        "embedding": query_embedding
+    }
+    save_cache(cache_store)
+###############################################################################
+# 10) Query Handling
+###############################################################################
+def handle_query(query: str, detail: bool = False) -> str:
+    """
+    Main function to process the query.
+    :param query: The user's question.
+    :param detail: Whether the user wants a more detailed response.
+    :return: Response string from Daily Wellness AI.
+    """
     if not query or not isinstance(query, str) or len(query.strip()) == 0:
         return "Please provide a valid question."
     try:
+        # 1) Check the cache first (ADDED for speed)
+        cached_answer = get_cached_answer(query)
+        if cached_answer:
+            return cached_answer
+        # 2) Sanity Check: Determine if the question is relevant to daily wellness
         is_relevant = sanity_checker.is_relevant(query)
         if not is_relevant:
             return "Your question seems out of context or not related to daily wellness. Please ask a wellness-related question."
+        # 3) Proceed with retrieval
         retrieved = retriever.retrieve(query)
         if not retrieved:
             return "I'm sorry, I couldn't find an answer to your question."
+        # 4) Check similarity threshold
         top_score = retrieved[0][1]  # Assuming the list is sorted descending
         similarity_threshold = 0.3  # Adjust this threshold based on empirical results
             # Optionally, process the web_search_response if needed
             # For simplicity, return the web search response directly
+            answer = (
                 f"**Daily Wellness AI**\n\n"
                 f"{web_search_response}\n\n"
                 "Disclaimer: This information is retrieved from the web and is not a substitute for professional medical advice.\n\n"
                 "Wishing you a calm and wonderful day!"
             )
+            # Store in cache before returning
+            store_in_cache(query, answer)
+            return answer
+        # 5) Proceed with answer expansion using retrieved_answers
         responses = [ans[0] for ans in retrieved]
+        expanded_answer = answer_expander.expand(query, responses, detail=detail)
+        # 6) Store in cache (ADDED)
+        store_in_cache(query, expanded_answer)
         return expanded_answer
     except Exception as e:
         logger.error(f"Error handling query: {e}")
         return "An error occurred while processing your request."
 ###############################################################################
+# 11) Gradio Interface
 ###############################################################################
+def gradio_interface(query: str, detail: bool):
+    """
+    Gradio interface function that optionally takes a detail parameter for longer responses.
+    """
     try:
+        response = handle_query(query, detail=detail)
         formatted_response = response  # Response is already formatted
         return formatted_response
     except Exception as e:
         logger.debug("Exception details:", exc_info=True)
         return "**An error occurred while processing your request. Please try again later.**"
+# ADDED: We now have a checkbox for detail in the Gradio UI
 interface = gr.Interface(
     fn=gradio_interface,
+    inputs=[
+        gr.Textbox(
+            lines=2,
+            placeholder="e.g., What is box breathing?",
+            label="Ask Daily Wellness AI"
+        ),
+        gr.Checkbox(
+            label="In-Depth Answer?",
+            value=False,
+            info="Check for a longer, more detailed response."
+        )
+    ],
     outputs=gr.Markdown(label="Answer from Daily Wellness AI"),
     title="Daily Wellness AI",
+    description="Ask wellness-related questions and receive synthesized, creative answers. Optionally request a more in-depth response.",
     theme="default",
     examples=[
+        ["What is box breathing and how does it help reduce anxiety?", True],
+        ["Provide a daily wellness schedule incorporating box breathing techniques.", False],
+        ["What are some tips for maintaining good posture while working at a desk?", True],
+        ["Who is the CEO of Hugging Face?", False]  # Example of an out-of-context question
     ],
     allow_flagging="never"
 )
 ###############################################################################
+# 12) Launch Gradio
 ###############################################################################
 if __name__ == "__main__":
     try: