Spaces:

Futuresony
/

Mr.Events

Running

App Files Files Community

Futuresony commited on Jul 17

Commit

9cd0bee

verified ·

1 Parent(s): 1e6371a

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -728

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
-# This block contains the full combined script for testing.
-# It includes all the code from the previous successful steps.
-# Combined Imports
 import os
 import gradio as gr
 from huggingface_hub import InferenceClient
@@ -13,80 +12,70 @@ import json
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer, util, CrossEncoder
 import gspread
-# from google.colab import auth
-from google.auth import default
 from tqdm import tqdm
 from duckduckgo_search import DDGS
 import spacy
 from datetime import date, timedelta
-from dateutil.relativedelta import relativedelta # Corrected typo
-import traceback # Import traceback
-import base64 # Import base64
-# Add PyTorch version and CUDA availability check
-print(f"PyTorch version: {torch.__version__}")
-print(f"Is CUDA available: {torch.cuda.is_available()}")
-if torch.cuda.is_available():
-    print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
-# Optional: Add check for torchvision version if installed
-try:
-    import torchvision
-    print(f"Torchvision version: {torchvision.__version__}")
-except ImportError:
-    print("Torchvision not installed.")
 # Suppress warnings
 warnings.filterwarnings("ignore", category=UserWarning)
-# Define global variables and load secrets
 HF_TOKEN = os.getenv("HF_TOKEN")
-SHEET_ID = "19ipxC2vHYhpXCefpxpIkpeYdI43a1Ku2kYwecgUULIw"
 GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
-# Initialize InferenceClient
-client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
-# Load spacy model for sentence splitting
 nlp = None
 try:
-    nlp = spacy.load("en_core_web_sm")
-    print("SpaCy model 'en_core_web_sm' loaded.")
-except OSError:
-    print("SpaCy model 'en_core_web_sm' not found. Downloading...")
     try:
-        os.system("python -m spacy download en_core_web_sm")
         nlp = spacy.load("en_core_web_sm")
-        print("SpaCy model 'en_core_web_sm' downloaded and loaded.")
-    except Exception as e:
-        print(f"Failed to download or load SpaCy model: {e}")
-# Load SentenceTransformer for RAG/business info retrieval
-embedder = None
-try:
     print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
     embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
     print("Sentence Transformer loaded.")
-except Exception as e:
-     print(f"Error loading Sentence Transformer: {e}")
-# Load a Cross-Encoder model for re-ranking retrieved documents
-reranker = None
-try:
     print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
     reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
     print("Cross-Encoder Reranker loaded.")
 except Exception as e:
-    print(f"Error loading Cross-Encoder Reranker: {e}")
-    print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
     print(traceback.format_exc())
-    reranker = None
-# Google Sheets Authentication
 gc = None # Global variable for gspread client
 def authenticate_google_sheets():
     """Authenticates with Google Sheets using base64 encoded credentials."""
@@ -94,6 +83,7 @@ def authenticate_google_sheets():
     print("Authenticating Google Account...")
     if not GOOGLE_BASE64_CREDENTIALS:
         print("Error: GOOGLE_BASE64_CREDENTIALS secret not found.")
         return False
     try:
@@ -111,16 +101,15 @@ def authenticate_google_sheets():
         print(traceback.format_exc())
         return False
-# Google Sheets Data Loading and Embedding
-# business_data = [] # Global variable to store loaded data - This was intended to be global, but needs to be named 'data' to match usage
-data = [] # Global variable to store loaded data - Renamed to 'data'
 descriptions_for_embedding = []
 embeddings = torch.tensor([])
 business_info_available = False # Flag to indicate if business info was loaded successfully
 def load_business_info():
     """Loads business information from Google Sheet and creates embeddings."""
-    global data, descriptions_for_embedding, embeddings, business_info_available # Added 'data' to global
     business_info_available = False # Reset flag
     if gc is None:
@@ -129,6 +118,7 @@ def load_business_info():
     if not SHEET_ID:
          print("Error: SHEET_ID not set.")
          return
     try:
@@ -138,21 +128,19 @@ def load_business_info():
         if not data_records:
             print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
-            data = [] # Use the global 'data'
             descriptions_for_embedding = []
         else:
             # Filter out rows missing 'Service' or 'Description'
             filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
             if not filtered_data:
                 print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
-                data = [] # Use the global 'data'
                 descriptions_for_embedding = []
             else:
-                data = filtered_data # Assign to the global 'data'
-                # Use BOTH Service and Description for embedding
                 descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]
-                # Only encode if descriptions_for_embedding are found and embedder is available
                 if descriptions_for_embedding and embedder is not None:
                     print("Encoding descriptions...")
                     try:
@@ -161,12 +149,12 @@ def load_business_info():
                         business_info_available = True # Set flag if successful
                     except Exception as e:
                         print(f"Error during description encoding: {e}")
-                        embeddings = torch.tensor([]) # Ensure embeddings is an empty tensor on error
                         business_info_available = False # Encoding failed
                 else:
                     print("Skipping encoding descriptions: No descriptions found or embedder not available.")
-                    embeddings = torch.tensor([]) # Ensure embeddings is an empty tensor
-                    business_info_available = False # Cannot use RAG without descriptions or embedder
         print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
         if not business_info_available:
@@ -175,88 +163,45 @@ def load_business_info():
     except gspread.exceptions.SpreadsheetNotFound:
         print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
         print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
-        business_info_available = False # Sheet not found
     except Exception as e:
         print(f"An error occurred while accessing the Google Sheet: {e}")
         print(traceback.format_exc())
-        business_info_available = False # Other sheet access error
-# Business Info Retrieval (RAG)
-def retrieve_business_info(query: str, threshold: float = 0.50, max_matches: int = 5) -> tuple[list, float]:
     """
     Retrieves relevant business information from loaded data based on a query.
-    Args:
-        query: The user's query string.
-        threshold: Minimum relevance score for a match.
-        max_matches: The maximum number of top relevant entries to retrieve *before* thresholding.
-    Returns:
-        A tuple containing:
-        - A list of dictionaries, where each dictionary is a relevant row from the
-          Google Sheet data that meets the threshold.
-        - The score of the best match found (even if below threshold).
-        Returns an empty list and score 0.0 if RAG is not available or
-        no relevant information is found.
     """
     global data
     if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
         print("Business information retrieval is not available or data is empty.")
-        return [], 0.0
     try:
         query_embedding = embedder.encode(query, convert_to_tensor=True)
         cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
-        # Get top N results *before* thresholding to allow re-ranking on potentially relevant items
-        top_n_indices_pre_rerank = torch.topk(cosine_scores, k=min(max_matches * 2, len(data)))[1].tolist() # Get more for better re-ranking pool
-        top_n_pre_rerank_results = [data[i] for i in top_n_indices_pre_rerank]
-        descriptions_pre_rerank = [descriptions_for_embedding[i] for i in top_n_indices_pre_rerank]
-        best_score_overall = torch.max(cosine_scores).item() if len(cosine_scores) > 0 else 0.0
-        # Optional: Re-rank the top results using the Cross-Encoder
-        reranked_results_with_scores = []
-        if reranker is not None and top_n_pre_rerank_results:
-            print("Re-ranking top results before thresholding...")
-            rerank_pairs = [(query, desc) for desc in descriptions_pre_rerank]
             rerank_scores = reranker.predict(rerank_pairs)
-            # Pair original results with rerank scores and sort
-            paired_results_with_scores = list(zip(top_n_pre_rerank_results, rerank_scores))
-            reranked_paired_results = sorted(paired_results_with_scores, key=lambda item: item[1], reverse=True)
             print("Re-ranking complete.")
-            # Apply threshold and max_matches *after* re-ranking
-            filtered_results = []
-            for item, score in reranked_paired_results:
-                 if score >= threshold:
-                      filtered_results.append(item)
-                      if len(filtered_results) >= max_matches: # Apply max_matches here
-                           break
-            return filtered_results, best_score_overall
         else:
-            # If reranker is not available, apply threshold and max_matches directly to cosine scores
-            print("Reranker not available or no results to rerank. Applying threshold and max_matches to cosine scores.")
-            filtered_results = []
-            sorted_cosine_scores, sorted_indices = torch.sort(cosine_scores, descending=True)
-            for i in range(min(max_matches, len(data))): # Take top N based on cosine
-                 if sorted_cosine_scores[i].item() >= threshold: # Check threshold
-                       filtered_results.append(data[sorted_indices[i].item()])
-                 else:
-                      break # Stop if score drops below threshold
-            return filtered_results, best_score_overall
     except Exception as e:
         print(f"Error during business information retrieval: {e}")
         print(traceback.format_exc())
-        return [], 0.0
 # Function to perform DuckDuckGo Search and return results with URLs
 def perform_duckduckgo_search(query: str, max_results: int = 5):
     """
@@ -267,29 +212,21 @@ def perform_duckduckgo_search(query: str, max_results: int = 5):
     print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
     search_results_list = []
     try:
-        # Add a delay before each search
         time.sleep(1) # Sleep for 1 second
         with DDGS() as ddgs:
             if not query or len(query.split()) < 2:
                  print(f"Skipping search for short query: '{query}'")
                  return []
-            # Use text() method for general text search
             results_generator = ddgs.text(query, max_results=max_results)
             results_found = False
             for r in results_generator:
                 search_results_list.append(r)
                 results_found = True
             if not results_found and max_results > 0:
                  print(f"DuckDuckGo search for '{query}' returned no results.")
     except Exception as e:
         print(f"Error during Duckduckgo search for '{query}': {e}")
         return []
     return search_results_list
 # Function to perform date calculation if needed
@@ -297,8 +234,6 @@ def perform_date_calculation(query: str):
     """
     Analyzes query for date calculation requests and performs the calculation.
     Returns a dict describing the calculation and result, or None.
-    Handles formats like 'X days ago', 'X days from now', 'X weeks ago', 'X weeks from now', 'what is today's date'.
-    Uses dateutil for slightly more flexibility (though core logic remains simple).
     """
     print(f"Executing Tool: perform_date_calculation with query='{query}')")
     query_lower = query.lower()
@@ -357,9 +292,7 @@ def perform_date_calculation(query: str):
     return {"query": query, "description": desc, "result": None, "success": False}
-# --- Tool Definitions for the Model ---
-# Describe the tools available to the model in a structured format
-# This will be injected into the prompt.
 TOOL_DEFINITIONS = """
 Available tools:
 1.  **search**: Use this tool to perform a web search for current external information. Useful for facts, news, weather, etc.
@@ -383,10 +316,6 @@ Available tools:
 """
-# --- System Prompt Template for Tool Use ---
-# This template instructs the model on how to use the tools and format its output.
-# Inject this *within* the user message content.
-# MODIFIED to ask for COMPREHENSIVE answers
 tool_use_system_template = """<system>
 You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.
@@ -427,616 +356,95 @@ Output ONLY tool calls within <tool_code> tags or a final answer using the 'answ
 MAX_HISTORY_TURNS = 5 # Keep last 5 turns
-# --- Chat Logic Function with Tool Use ---
-# Modified to be a generator function
-def chat_with_bot(user_input, chat_history_state, system_message, max_new_tokens, temperature, top_p): # Added parameters from Gradio interface
-    """
-    Processes user input through an iterative tool-use logic for Gradio interface.
-    Takes user_input string and chat_history_state (list of lists) as input.
-    Yields the updated chat_history_state (list of lists) at each step.
-    Uses a structured tool-calling approach.
-    Guaranteed strict user/assistant role alternation in model_chat_history.
-    """
-    # Basic Input Safety Check (Example)
-    if any(phrase in user_input.lower() for phrase in ["harmful content", "malicious intent"]):
-        safe_response = "I cannot process requests that involve harmful or inappropriate content."
-        yield chat_history_state + [[user_input, safe_response]] # Yield immediately for display
-        return # Exit the generator
-    # Append user message to history immediately for display
-    # The bot message will be updated iteratively
-    # We append a placeholder now, and update it with the final response later.
-    # Ensure the initial yield is a list of lists for Gradio, even if it's just one turn
-    initial_history = chat_history_state + [[user_input, "..."]]
-    yield initial_history # Yield state with placeholder
-    original_user_input = user_input
-    print(f"\n--- Starting turn with input: {user_input} ---") # Debug Print
-    # Get current date
-    current_date = date.today().strftime('%Y-%m-%d')
-    print(f"Current Date: {current_date}") # Debug Print
-    # Initialize unique_urls here to avoid UnboundLocalError
-    unique_urls = set() # Collect URLs for Sources section
-    # Maintain an internal model history that strictly alternates user/assistant roles
-    # This history will be used directly by apply_chat_template.
-    # It represents the conversation *as the model sees it*, including tool calls/results.
-    # Build this history from the *completed* past turns from chat_history_state.
-    model_chat_history = []
-    # Convert Gradio chat history (list of lists) to model history (list of dicts)
-    # Ensure strict alternation: user, assistant, user, assistant...
-    # Only add complete turns from the *past* history (exclude the current incomplete turn)
-    # Limit the history length
-    history_to_process = chat_history_state # Use the full history passed to the function initially
-    # Ensure we only take pairs [user, bot] from past history where bot is NOT the initial placeholder
-    # This guarantees that the last message in `recent_complete_turns` corresponds to a *completed* assistant response.
-    complete_past_turns = [
-        turn for turn in history_to_process
-        if turn is not None and len(turn) == 2 and turn[0] is not None and turn[1] is not None and str(turn[1]).strip() != "..."
-    ]
-    # Take the last MAX_HISTORY_TURNS complete turns
-    recent_complete_turns = complete_past_turns[max(0, len(complete_past_turns) - MAX_HISTORY_TURNS):]
-    for user_msg, bot_msg in recent_complete_turns:
-         # Add user message (must be present)
-         if user_msg is not None: # Should always be True based on complete_past_turns filter
-              model_chat_history.append({"role": "user", "content": str(user_msg).strip()})
-         # Add assistant message (must be present and non-placeholder based on complete_past_turns filter)
-         if bot_msg is not None and str(bot_msg).strip() != "...": # Should always be True based on filter
-              model_chat_history.append({"role": "assistant", "content": str(bot_msg).strip()})
-    # Initialize variables for the tool-use loop
-    max_tool_iterations = 5 # Limit the number of tool calls in a single turn to prevent infinite loops
-    final_response_text = None # Variable to hold the final answer from the 'answer' tool
-    current_tool_results_text = "" # Accumulate tool results text for the *next* model call in this turn
-    print("Starting tool execution loop...")
-    try: # This is the main try block for the chat_with_bot function
-        for i in range(max_tool_iterations):
-            print(f"\n--- Tool Iteration {i+1} ---")
-            # Step 1 & 2: Prepare the user message content for THIS iteration and append to history
-            # The content of the user message for this iteration depends on whether it's the first step
-            # (original query + system prompt) or a subsequent step (tool results).
-            current_user_message_content = ""
-            if i == 0:
-                # First iteration: Include the system template and the original user input
-                system_prompt_content = tool_use_system_template.format(
-                    current_date=current_date,
-                    tool_definitions=TOOL_DEFINITIONS
-                )
-                current_user_message_content = system_prompt_content + "\n\nUser Query: " + original_user_input
-            else:
-                 # Subsequent iterations: Include the tool results from the previous assistant response.
-                 if current_tool_results_text:
-                     current_user_message_content = "<tool_results>\n" + current_tool_results_text.strip() + "\n</tool_results>"
-                     current_tool_results_text = "" # Clear the buffer after adding to the prompt
-                 else:
-                     # If no new tool results were accumulated in the previous step (e.g., parsing failed, no tools called),
-                     # send a message indicating this so the model doesn't wait indefinitely.
-                     current_user_message_content = "<tool_results>No new results or no tools were called in the previous turn.</tool_results>"
-                     print("No new tool results to add for this iteration.")
-            # Append the user message for the current iteration to the main model history.
-            # This history is what apply_chat_template will process.
-            # If the logic is correct, model_chat_history should always end with an 'assistant' role
-            # before this append, except for the very first turn of the conversation.
-            model_chat_history.append({"role": "user", "content": current_user_message_content.strip()})
-            # Step 3 & 4: Apply template to get the full prompt and Generate model output
-            # The history `model_chat_history` should now be in the correct state for generation:
-            # starting with 'user' and ending with the current 'user' message.
-            # The check below verifies the strict alternation before tokenization.
-            if len(model_chat_history) > 1 and model_chat_history[-1]['role'] == model_chat_history[-2]['role']:
-                 print("Error: History roles are not alternating before generation!")
-                 print("History:", model_chat_history)
-                 final_response_text = "Sorry, I encountered an internal error with the conversation history format before generation."
-                 break # Break the tool loop if history is malformed
-            prompt_for_generation = client.chat_completion(
-                messages=model_chat_history, # Use the main model_chat_history directly
-                max_tokens=max_new_tokens, # Use max_new_tokens parameter
-                stream=False, # Use non-streaming for tool parsing loop
-                temperature=temperature, # Use temperature parameter
-                top_p=top_p, # Use top_p parameter
-                # Add tool_choice="auto" or specific tool names if the API supports it
-                # For Gemma-2-IT, we rely on prompt engineering for tool calls.
-            )
-            raw_model_output = ""
-            if prompt_for_generation and prompt_for_generation.choices:
-                 raw_model_output = prompt_for_generation.choices[0].message.content.strip()
-            else:
-                 print("Model returned an empty response or no choices.")
-                 raw_model_output = "<system_error>Error: Model returned empty response.</system_error>" # Report error via system tag
-            print(f"Raw model output: {raw_model_output}")
-            # Step 5: Append the model's raw output as the assistant message for THIS iteration
-            # This is crucial for maintaining the alternation in `model_chat_history`
-            model_chat_history.append({"role": "assistant", "content": raw_model_output.strip()})
-            # Step 6: Parse Tool Calls from the latest assistant message (which is now the last entry in history)
-            tool_calls = []
-            # Use regex to find all content within <tool_code> tags in the latest assistant message
-            matches = re.findall(r'<tool_code>(.*?)</tool_code>', model_chat_history[-1]['content'], re.DOTALL)
-            if not matches:
-                print("No tool calls found in latest model output.")
-                # If no tool calls, check if the model tried to output an answer directly
-                # This is a fallback if the model fails to use the 'answer' tool.
-                # Apply cleanup patterns just to the latest assistant message to see if it's a potential answer
-                cleaned_potential_answer = re.sub(r'<tool_code>.*?</tool_code>', '', model_chat_history[-1]['content'], flags=re.DOTALL) # Remove tool tags first
-                cleaned_potential_answer = re.sub(r'<.*?>', '', cleaned_potential_answer).strip() # Remove any other potential tags
-                # If the cleaned output is not empty or just whitespace, treat it as a potential final answer
-                if cleaned_potential_answer and final_response_text is None:
-                    print("Model output does not contain tool calls, treating cleaned output as potential direct answer.")
-                    final_response_text = cleaned_potential_answer
-                    break # Exit the tool loop as we have a response
-                # If no tool calls and not a potential answer, check for explicit system errors reported by the model
-                if "<system_error>" in model_chat_history[-1]['content'] or "<error>" in model_chat_history[-1]['content']:
-                     print("Model output contains system error tags. Exiting tool loop.")
-                     # The synthesis step will pick up these errors from the history
-                     break # Exit loop on critical error reported by the model
-                # If no tool calls and not a potential answer, and no explicit error, the loop might continue.
-                # The next iteration's user message content will be generated as "No new results..."
-                continue # Skip to the next iteration
-            # Step 7: Execute Tool Calls and accumulate results for the *next* iteration's user message
-            # We clear the buffer here, as we are processing the *latest* assistant message's tools.
-            current_tool_results_text = ""
-            answer_tool_called_in_this_iter = False # Reset flag for this iteration's output
-            for match in matches:
-                try:
-                    # Attempt to parse the content within the tags as JSON
-                    tool_call_json = json.loads(match.strip())
-                    if "tool_name" in tool_call_json and "parameters" in tool_call_json:
-                        tool_name = tool_call_json.get("tool_name")
-                        parameters = tool_call_json.get("parameters", {})
-                        if tool_name == "answer":
-                            final_response_text = parameters.get("text", "")
-                            answer_tool_called_in_this_iter = True
-                            print(f"Model called 'answer' tool. Final response intended: '{final_response_text}'")
-                            # Once the 'answer' tool is called, we prioritize exiting the loop after this iteration.
-                            # We still process any other tool calls in this *same* model output, but then break the loop afterwards.
-                            continue # Process next tool call in the same output (from the same model output)
-                        elif tool_name == "search":
-                            query = parameters.get("query")
-                            max_results = parameters.get("max_results", 5)
-                            if query:
-                                print(f"Executing Tool: search with query='{query}', max_results={max_results}")
-                                results = perform_duckduckgo_search(query, max_results)
-                                current_tool_results_text += f"<search_results_for_query query='{query}'>\n"
-                                if results:
-                                    for r in results:
-                                         snippet = r.get('body', 'N/A')
-                                         if len(snippet) > 300:
-                                             snippet = snippet[:300] + "..."
-                                         current_tool_results_text += f"<item>\n<title>{r.get('title', 'N/A')}</title>\n<snippet>{snippet}</snippet>\n<url>{r.get('href', 'N/A')}</url>\n</item>\n"
-                                    print(f"Executed search for '{query}'. Found {len(results)} results.")
-                                else:
-                                     current_tool_results_text += "No results found.\n"
-                                     print(f"No search results found for '{query}'.")
-                                current_tool_results_text += "</search_results_for_query>\n"
-                            else:
-                                current_tool_results_text += f"<search_results_for_query query='{query}'><error>Missing 'query' parameter.</error></search_results_for_query>\n"
-                                print(f"Skipping search tool call: Missing 'query' parameter.")
-                        elif tool_name == "lookup_business_info":
-                             query = parameters.get("query")
-                             # Use the threshold and max_matches provided by the model, or the defaults
-                             threshold = parameters.get("threshold", 0.50)
-                             max_matches = parameters.get("max_matches", 5) # Use max_matches parameter
-                             if query:
-                                  print(f"Executing Tool: lookup_business_info with query='{query}', threshold={threshold:.4f}, max_matches={max_matches}")
-                                  # retrieve_business_info now returns a LIST of matches and the best score
-                                  matches_list, best_score = retrieve_business_info(query, threshold=threshold, max_matches=max_matches)
-                                  # MODIFIED: Format results block to contain MULTIPLE match tags
-                                  current_tool_results_text += f"<lookup_business_info_results_for_query query='{query}' requested_threshold='{threshold:.4f}' requested_max_matches='{max_matches}' final_best_score='{best_score:.4f}'>\n"
-                                  if matches_list: # Check if the list is not empty
-                                       for match in matches_list: # Iterate through the list of matches
-                                            if isinstance(match, dict): # Ensure it's a dictionary
-                                                current_tool_results_text += f"<match>\n"
-                                                current_tool_results_text += f"<service>{match.get('Service', 'N/A')}</service>\n"
-                                                current_tool_results_text += f"<description>{match.get('Description', 'N/A')}</description>\n"
-                                                current_tool_results_text += f"<price>{match.get('Price', 'N/A')}</price>\n"
-                                                current_tool_results_text += f"<available>{match.get('Available', 'N/A')}</available>\n"
-                                                # Add other relevant fields from your sheet here if needed for synthesis
-                                                # e.g., <contact_person> etc.
-                                                current_tool_results_text += f"</match>\n"
-                                           # Optionally add a note if any item in the list was not a dict
-                                            else:
-                                                print(f"Warning: Item in retrieved_business_info list was not a dict: {match}")
-                                       print(f"Executed business lookup for '{query}'. Found {len(matches_list)} matches above threshold {threshold:.4f}. Best score: {best_score:.4f}.")
-                                  else:
-                                       # This case covers No matches found above threshold within retrieve_business_info
-                                       current_tool_results_text += f"No relevant matches found above threshold {threshold:.4f} (best score: {best_score:.4f}).\n"
-                                       print(f"Executed business lookup for '{query}'. No matches found above threshold.")
-                                       # Add a note about the best score being below threshold
-                                       if best_score > 0: # Only add note if *some* match was found, but not above threshold
-                                            current_tool_results_text += f"<system_note>Best match score ({best_score:.4f}) was below the requested threshold ({threshold:.4f}).</system_note>\n"
-                                  current_tool_results_text += "</lookup_business_info_results_for_query>\n"
-                             else:
-                                  current_tool_results_text += f"<lookup_business_info_results_for_query query='{query}'><error>Missing 'query' parameter.</error></lookup_business_info_results_for_query>\n"
-                                  print(f"Skipping business lookup tool call: Missing 'query' parameter.")
-                        elif tool_name == "perform_date_calculation":
-                             query = parameters.get("query")
-                             if query:
-                                  print(f"Executing Tool: perform_date_calculation with query='{query}'")
-                                  result = perform_date_calculation(query) # This function already returns a dict or error
-                                  current_tool_results_text += f"<perform_date_calculation_results_for_query query='{query}'>\n"
-                                  if result and result.get('success'): # Check the 'success' key
-                                       current_tool_results_text += f"<description>{result.get('description', 'Calculation Successful')}</description>\n<date>{result.get('result')}</date>\n"
-                                       print(f"Executed date calculation for '{query}'. Result: {result.get('result')}.")
-                                  elif result and result.get('description'):
-                                       current_tool_results_text += f"<description>{result.get('description')}</description>\n" # Report description if result is None or not success
-                                       print(f"Executed date calculation for '{query}'. Failed: {result.get('description')}.")
-                                  elif isinstance(result, str) and result.startswith("Error"):
-                                       current_tool_results_text += f"<error>{result}</error>\n" # Report error string
-                                       print(f"Executed date calculation for '{query}'. Error: {result}.")
-                                  else: # Generic failure case
-                                       current_tool_results_text += "Calculation failed or no specific date recognized.\n"
-                                       print(f"Executed date calculation for '{query}'. No specific result.")
-                                  current_tool_results_text += "</perform_date_calculation_results_for_query>\n"
-                             else:
-                                  current_tool_results_text += f"<perform_date_calculation_results_for_query query='{query}'><error>Missing 'query' parameter.</error></lookup_business_info_results_for_query>\n"
-                                  print(f"Skipping date calculation tool call: Missing 'query' parameter.")
-                        else:
-                            print(f"Unknown tool requested by model: {tool_name}")
-                            # Add a note to results buffer about the unknown tool
-                            current_tool_results_text += f"<system_note>Unknown tool requested: {tool_name}</system_note>\n"
-                    else:
-                        print(f"Parsed JSON missing 'tool_name' or 'parameters': {tool_call_json}")
-                        current_tool_results_text += f"<system_note>Failed to parse tool call: Missing 'tool_name' or 'parameters' in JSON: {match.strip()}</system_note>\n"
-                except json.JSONDecodeError as e:
-                    print(f"Failed to parse tool call JSON: {e}")
-                    print(f"Content was: {match.strip()}")
-                    current_tool_results_text += f"<system_note>Failed to parse tool call JSON: {e}. Content: {match.strip()}</system_note>\n"
-                except Exception as e:
-                     print(f"An unexpected error occurred during tool execution for call '{tool_call_json}': {e}") # Changed tool_call_json_str to tool_call_json
-                     print(traceback.format_exc()) # Print traceback for tool execution errors
-                     current_tool_results_text += f"<system_note>An unexpected error occurred during tool call processing: {e}. Content: {match.strip()}</system_note>\n"
-            # Step 8: Check if the 'answer' tool was called in this iteration
-            if answer_tool_called_in_this_iter:
-                print("Answer tool called. Exiting tool loop.")
-                break # Exit the main tool iteration loop
-            # Step 9: If max iterations reached and 'answer' tool wasn't called
-            if i == max_tool_iterations - 1 and final_response_text is None:
-                print(f"Max tool iterations reached ({max_tool_iterations}) without 'answer' call.")
-                # Add a final note to the results buffer so the model sees it in the last forced synthesis step
-                current_tool_results_text += "<system_note>Maximum tool calls reached. Please provide a final answer based on the information gathered so far or state that the request cannot be fully fulfilled.</system_note>\n"
-                # Fall through to the final response generation step below
-    # --- End of the main try block for chat_with_bot ---
-    # THIS EXCEPT BLOCK NEEDS TO BE AT THE SAME INDENTATION LEVEL AS THE 'try' ABOVE
-    except Exception as e: # This except matches the 'try' block at the beginning of the function
-         print(f"An unexpected error occurred in the chat_with_bot function: {e}")
-         print(traceback.format_exc()) # Print full traceback for debugging
-         final_response_text = f"Sorry, I encountered an unexpected error while processing your request: {e}"
-         # In case of error, ensure final_response_text is set so we proceed to update history
-    # The code below runs AFTER the tool iteration loop and its enclosing try/except finishes
-    # --- Final Response Generation (Synthesis) ---
-    # This step is either using the text from the 'answer' tool call,
-    # or generating a fallback response if the model failed to call 'answer'.
-    print("\n--- Final Response Generation ---")
-    # If the model successfully called the 'answer' tool, use that text.
-    # Otherwise, construct a synthesis prompt for the model to generate a final answer.
-    if final_response_text is None:
-        print("Model did not call 'answer' tool. Falling back to synthesis prompt.")
-        # Model failed to call the 'answer' tool within iterations or encountered an error.
-        # Fallback: Generate a response based on the accumulated history and tool results.
-        # The history `model_chat_history` now contains the full trace of tool calls
-        # and the user messages containing the tool results.
-        # Construct the synthesis prompt content.
-        # MODIFIED Synthesis Prompt to emphasize comprehensive answer
-        synthesis_prompt_content = """<system>
-Please provide a final, comprehensive answer to the user's original query based on ALL the information gathered from the executed tools and the conversation history. Synthesize the information into a coherent, natural language response. Pay special attention to providing detailed descriptions and listing all relevant points found from the business lookup tool when multiple items were retrieved.
-User's original query: "{original_user_input}"
-Information gathered from tools and process notes:
-{gathered_info_summary}
-Synthesize ALL relevant information into a clear, concise, and **comprehensive** natural language response for the user. When presenting information from multiple business lookup results, structure your answer to clearly describe each item found (e.g., list them, describe each one fully).
-**Guidelines for your response:**
-- Address the user's original question directly.
-- Use the information provided in the 'Information gathered' section, synthesizing details from all relevant results.
-- If the business lookup returned multiple matches, present the information for *each* match found clearly and informatively.
-- If a tool was executed but returned no relevant results (especially if the best score was below the threshold), or if there were errors (<system_error>, <error>, <system_note> tags), explain this gracefully to the user.
-- Maintain a helpful, polite, and professional business tone, reflecting the Futuresony brand and your identity as FutureAi.
-- Do NOT include raw tool call or result tags in your final answer.
-- If you were unable to gather necessary information, clearly state what you could and could not find.
-After your answer, generate 2-3 concise follow-up questions that might be helpful or relevant to the user based on the conversation and your response. List these questions clearly at the end.
-If Search Results were used, list the relevant URLs under a "Sources:" heading at the very end.
-</system>
-"""
-        # Summarize the gathered information by processing the model_chat_history
-        gathered_info_summary = ""
-        # unique_urls = set() # Moved initialization outside the if block # Commented out as initialization is now at the beginning of the function
-        # Iterate through the model history to find user messages that followed an assistant message
-        # These 'user' messages should contain the tool results block if tools were run.
-        # We iterate up to the second-to-last message, as the *very* last message in history
-        # will be the synthesis prompt itself, which hasn't been processed yet.
-        for i in range(1, len(model_chat_history)):
-             # Look for 'user' messages that follow an 'assistant' message
-             if model_chat_history[i]['role'] == 'user' and isinstance(model_chat_history[i]['content'], str) and '<tool_results>' in model_chat_history[i]['content']:
-                   msg_content = model_chat_history[i]['content']
-                   # Check if it contains the tool results block
-                   tool_results_block = re.search(r'<tool_results>(.*?)</tool_results>', msg_content, re.DOTALL)
-                   if tool_results_block:
-                        content = tool_results_block.group(1) # Content inside <tool_results>
-                        # --- Extract and format info from tool result blocks ---
-                        search_blocks = re.findall(r'<search_results_for_query.*?>(.*?)</search_results_for_query>', content, re.DOTALL)
-                        for sr_content in search_blocks:
-                             query_match = re.search(r"query='(.*?)'", sr_content) # Extract query attribute
-                             query = query_match.group(1) if query_match else "Unknown"
-                             gathered_info_summary += f"Search results for '{query}':\n"
-                             items = re.findall(r'<item>(.*?)</item>', sr_content, re.DOTALL)
-                             if items:
-                                  for item_content in items:
-                                       title = re.search(r'<title>(.*?)</title>', item_content, re.DOTALL)
-                                       snippet = re.search(r'<snippet>(.*?)</snippet>', item_content, re.DOTALL)
-                                       url = re.search(r'<url>(.*?)</url>', item_content, re.DOTALL)
-                                       title_text = title.group(1).strip() if title else 'N/A'
-                                       snippet_text = snippet.group(1).strip() if snippet else 'N/A'
-                                       url_text = url.group(1).strip() if url else 'N/A'
-                                       gathered_info_summary += f"- Title: {title_text}, Snippet: {snippet_text}\n"
-                                       if url_text and url_text != 'N/A':
-                                            unique_urls.add(url_text) # Add URL to set
-                             elif "No results found" in sr_content:
-                                  gathered_info_summary += "- No results found.\n"
-                             elif "<error>" in sr_content:
-                                  error_text = re.search(r'<error>(.*?)</error>', sr_content, re.DOTALL)
-                                  gathered_info_summary += f"- Error during search: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
-                        # Business lookup results (MODIFIED to handle MULTIPLE match tags)
-                        lookup_blocks = re.findall(r'<lookup_business_info_results_for_query.*?>(.*?)</lookup_business_info_results_for_query>', content, re.DOTALL)
-                        for lr_content in lookup_blocks:
-                             query_match = re.search(r"query='(.*?)'", lr_content)
-                             query = query_match.group(1) if query_match else "Unknown"
-                             # Extract requested_threshold, requested_max_matches, final_best_score
-                             req_thresh_match = re.search(r"requested_threshold='(.*?)'", lr_content)
-                             req_thresh = float(req_thresh_match.group(1)) if req_thresh_match else 0.50
-                             req_max_matches_match = re.search(r"requested_max_matches='(.*?)'", lr_content)
-                             req_max_matches = int(req_max_matches_match.group(1)) if req_max_matches_match else 5
-                             final_best_score_match = re.search(r"final_best_score='(.*?)'", lr_content)
-                             final_best_score = float(final_best_score_match.group(1)) if final_best_score_match else 0.0
-                             gathered_info_summary += f"Business lookup results for '{query}' (Requested Threshold: {req_thresh:.4f}, Requested Max Matches: {req_max_matches}, Final Best Score: {final_best_score:.4f}):\n"
-                             matches_found = re.findall(r'<match>(.*?)</match>', lr_content, re.DOTALL) # Find ALL match tags
-                             if matches_found:
-                                  gathered_info_summary += f"  Found {len(matches_found)} relevant item(s):\n"
-                                  for match_content in matches_found: # Iterate through each match
-                                       service = re.search(r'<service>(.*?)</service>', match_content, re.DOTALL)
-                                       description = re.search(r'<description>(.*?)</description>', match_content, re.DOTALL)
-                                       price = re.search(r'<price>(.*?)</price>', match_content, re.DOTALL)
-                                       available = re.search(r'<available>(.*?)</available>', match_content, re.DOTALL)
-                                       # Add extraction for other fields if you include them in your tool output
-                                       # contact_person = re.search(r'<contact_person>(.*?)</contact_person>', match_content, re.DOTALL)
-                                       gathered_info_summary += f"  - Service: {service.group(1).strip() if service else 'N/A'}\n"
-                                       gathered_info_summary += f"    Description: {description.group(1).strip() if description else 'N/A'}\n"
-                                       gathered_info_summary += f"    Price: {price.group(1).strip() if price else 'N/A'}\n"
-                                       gathered_info_summary += f"    Available: {available.group(1).strip() if available else 'N/A'}\n"
-                                       # Add other fields here...
-                                       # if contact_person: gathered_info_summary += f"    Contact Person: {contact_person.group(1).strip()}\n"
-                             elif "No relevant matches found" in lr_content:
-                                  score_match = re.search(r"final_best_score='(.*?)'", lr_content) # Look for final_best_score
-                                  score = float(score_match.group(1)) if score_match else 0.0
-                                  threshold_match = re.search(r"requested_threshold='(.*?)'", lr_content)
-                                  threshold_val = float(threshold_match.group(1)) if threshold_match else 0.50
-                                  gathered_info_summary += f"  No relevant matches found above threshold {threshold_val:.4f} (best score: {score:.4f}).\n"
-                             elif "<error>" in lr_content:
-                                  error_text = re.search(r'<error>(.*?)</error>', lr_content, re.DOTALL)
-                                  gathered_info_summary += f"  Error during business lookup: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
-                            # Include system notes found within the business lookup results block
-                             system_notes_in_lookup = re.findall(r'<system_note>(.*?)</system_note>', lr_content, re.DOTALL)
-                             for note in system_notes_in_lookup:
-                                gathered_info_summary += f"  System Note within Lookup: {note.strip()}\n"
-                        # Date calculation results
-                        date_blocks = re.findall(r'<perform_date_calculation_results_for_query.*?>(.*?)</perform_date_calculation_results_for_query>', content, re.DOTALL)
-                        for dr_content in date_blocks:
-                             query_match = re.search(r"query='(.*?)'", dr_content)
-                             query = query_match.group(1) if query_match else "Unknown"
-                             gathered_info_summary += f"Date calculation results for '{query}':\n"
-                             date_val = re.search(r'<date>(.*?)</date>', dr_content, re.DOTALL)
-                             desc = re.search(r'<description>(.*?)</description>', dr_content, re.DOTALL)
-                             if date_val:
-                                  gathered_info_summary += f"- Result: {date_val.group(1).strip()}\n"
-                                  if desc: gathered_info_summary += f"  Description: {desc.group(1).strip()}\n"
-                             elif desc:
-                                  gathered_info_summary += f"- {desc.group(1).strip()}\n"
-                             elif "<error>" in dr_content:
-                                  error_text = re.search(r'<error>(.*?)</error>', dr_content, re.DOTALL)
-                                  gathered_info_summary += f"- Error during date calculation: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
-                             else:
-                                  gathered_info_summary += "- No specific date result found.\n"
-                        # System Notes/Errors from Tool Execution (outside of specific tool blocks but within <tool_results>)
-                        system_notes_in_results_block = re.findall(r'<system_note>(.*?)</system_note>', content, re.DOTALL)
-                        for note in system_notes_in_results_block:
-                             # Add only if not already added from within a specific lookup block
-                            if f"System Note: {note.strip()}\n" not in gathered_info_summary and f"System Note within Lookup: {note.strip()}\n" not in gathered_info_summary:
-                                gathered_info_summary += f"System Note from Tool Results: {note.strip()}\n"
-                        system_errors_in_results_block = re.findall(r'<system_error>(.*?)</system_error>', content, re.DOTALL)
-                        for error_note in system_errors_in_results_block:
-                             gathered_info_summary += f"System Error from Tool Results: {error_note.strip()}\n"
-        # Also check the raw model output (last assistant message) for system errors if tool results block wasn't generated
-        last_assistant_message_content = model_chat_history[-1]['content'] if model_chat_history and model_chat_history[-1]['role'] == 'assistant' else ""
-        system_errors_in_raw_output = re.findall(r'<system_error>(.*?)</system_error>', last_assistant_message_content, re.DOTALL)
-        for error_note in system_errors_in_raw_output:
-             # Add only if not already captured from within tool results block
-             if f"System Error from Tool Results: {error_note.strip()}" not in gathered_info_summary:
-                   gathered_info_summary += f"System Error in model output: {error_note.strip()}\n"
-        # Check for system notes/errors that might be outside <tool_results> but in the raw assistant output
-        system_notes_in_raw_output = re.findall(r'<system_note>(.*?)</system_note>', last_assistant_message_content, re.DOTALL)
-        for note in system_notes_in_raw_output:
-             if f"System Note from Tool Results: {note.strip()}" not in gathered_info_summary and f"Business Lookup Note: {note.strip()}\n" not in gathered_info_summary: # Avoid duplicates
-                  gathered_info_summary += f"System Note in model output: {note.strip()}\n"
-        if not gathered_info_summary.strip():
-            gathered_info_summary = "No specific information was gathered using tools."
-        # Add the synthesis prompt to the history for the final generation step
-        # This keeps the history structure correct for apply_chat_template
-        # IMPORTANT: This adds the synthesis prompt as the final USER message.
-        # The model will then generate the final ASSISTANT response.
-        temp_chat_history_for_synthesis = model_chat_history.copy() # Copy the history including tool results
-        synthesis_prompt_formatted = synthesis_prompt_content.format(
-            original_user_input=original_user_input,
-            gathered_info_summary=gathered_info_summary.strip() # Add the summary of results
-        )
-        # Append the synthesis prompt as the final user message content
-        # This maintains the user/assistant alternation (last was assistant, now user for synthesis instruction)
-        temp_chat_history_for_synthesis.append({"role": "user", "content": synthesis_prompt_formatted.strip()})
-        # Generate the final response using the history with the synthesis prompt
-        print("Generating final synthesized response...")
-        try:
-             final_synthesis_response = client.chat_completion(
-                 messages=temp_chat_history_for_synthesis, # Use the history with the synthesis prompt
-                 max_tokens=1024, # Allow reasonable tokens for synthesis
-                 stream=False,
-                 temperature=0.5, # Allow a bit more creativity for synthesis
-                 top_p=0.95,
-             )
-             if final_synthesis_response and final_synthesis_response.choices:
-                 final_response_text = final_synthesis_response.choices[0].message.content.strip()
-                 print(f"Synthesized response generated: {final_response_text[:100]}...")
-             else:
-                 final_response_text = "I was unable to generate a comprehensive answer based on the information gathered."
-                 print("Synthesis generation failed.")
-        except Exception as e:
-             print(f"Error during synthesis generation: {e}")
-             print(traceback.format_exc())
-             final_response_text = f"An error occurred while synthesizing the response: {e}"
-    # Add Sources if any URLs were collected
-    if unique_urls:
-         final_response_text += "\n\nSources:\n" + "\n".join(sorted(list(unique_urls))) # Sort URLs alphabetically
-    # Update the last message in chat_history_state with the final response
-    # Find the last turn in the original chat_history_state (which was already updated with the placeholder)
-    if chat_history_state and len(chat_history_state) > 0:
-        chat_history_state[-1][1] = final_response_text # Update the bot's message in the last turn
-    # Remove the [[TEXT]] marker from the chat_history_state *before* yielding
-    # Iterate through the history and clean each item
-    cleaned_chat_history_list_of_lists = []
-    for user_msg, bot_msg in chat_history_state:
-        cleaned_user_msg = user_msg.replace('[[TEXT]] ', '') if isinstance(user_msg, str) else user_msg
-        cleaned_bot_msg = bot_msg.replace('[[TEXT]] ', '') if isinstance(bot_msg, str) else bot_msg
-        cleaned_chat_history_list_of_lists.append([cleaned_user_msg, cleaned_bot_msg])
-    # Convert the cleaned list of lists to a list of tuples as required by Gradio
-    cleaned_chat_history_list_of_tuples = [(user_msg, bot_msg) for user_msg, bot_msg in cleaned_chat_history_list_of_lists]
-    print(f"\n--- Final Response: {final_response_text[:100]}... ---") # Debug Print
-    # Yield the cleaned history as a list of tuples
-    yield cleaned_chat_history_list_of_tuples # Yield the cleaned history as tuples
-# ──────────────────────────
-# 3  Gradio interface
-# ──────────────────────────
-# The Gradio interface definition remains the same as it correctly
-# uses the updated respond function.
 print(f"RAG functionality available: {business_info_available}")
 demo = gr.ChatInterface(
-    fn=chat_with_bot, # Use the new chat_with_bot function
     additional_inputs=[
-        gr.Textbox(value="You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.", label="System message (Note: This is less critical now as the system prompt is built internally for tool use.)", visible=False), # Hide this as system prompt is internal
-        gr.Slider(1, 4096, value=1024, step=1, label="Max new tokens (Note: Affects tool output processing and final answer length)"), # Increased max tokens
-        gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature (Note: Affects model's creativity, keep lower for reliable tool use)"),
         gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (nucleus sampling)"),
     ],
-    chatbot=gr.Chatbot(height=500), # Added height for better display
-    title="Gemma‑2‑9B‑IT Chat with RAG and Tool Use", # Updated title
-    description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API, with business info retrieved from Google Sheets and external search capabilities.", # Updated description
-    submit_btn="Send", # Renamed submit button
-    # Removed clear_btn as it caused a TypeError in the user's environment
 )
-# Enable request queueing (concurrency handled automatically on Gradio ≥ 4)
 demo.queue()
 if __name__ == "__main__":
     # Authenticate and load data before launching the demo
     if authenticate_google_sheets():
         load_business_info()
     else:
         print("Google Sheets authentication failed. RAG functionality will not be available.")
-    # The print statement for RAG status is added here, before launching the demo.
     print(f"RAG functionality available: {business_info_available}")
-    demo.launch(debug=True) # Added

+# This script combines all components for deployment on Hugging Face Spaces.
+# --- Imports ---
 import os
 import gradio as gr
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer, util, CrossEncoder
 import gspread
+# from google.colab import auth # Colab specific, remove for HF Spaces
+# from google.auth import default # Colab specific, remove for HF Spaces
 from tqdm import tqdm
 from duckduckgo_search import DDGS
 import spacy
 from datetime import date, timedelta
+from dateutil.relativedelta import relativedelta
+import traceback
+import base64
 # Suppress warnings
 warnings.filterwarnings("ignore", category=UserWarning)
+# --- Global Variables and Secrets ---
+# HF_TOKEN is automatically available in HF Spaces secrets
 HF_TOKEN = os.getenv("HF_TOKEN")
+# GOOGLE_BASE64_CREDENTIALS should be added as a Space Secret
+SHEET_ID = os.getenv("SHEET_ID") # Get SHEET_ID from Space Secrets
 GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
+# --- Model and Tool Initialization ---
+client = None # Initialize after HF_TOKEN is confirmed available
 nlp = None
+embedder = None
+reranker = None
 try:
+    # Initialize InferenceClient
+    if HF_TOKEN:
+        client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
+        print("Hugging Face Inference Client initialized.")
+    else:
+        print("Warning: HF_TOKEN not found. Inference Client not initialized.")
+    # Load spacy model for sentence splitting
     try:
         nlp = spacy.load("en_core_web_sm")
+        print("SpaCy model 'en_core_web_sm' loaded.")
+    except OSError:
+        print("SpaCy model 'en_core_web_sm' not found. Downloading...")
+        try:
+            # Use pip for installation in HF Spaces environment
+            os.system("pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz")
+            nlp = spacy.load("en_core_web_sm")
+            print("SpaCy model 'en_core_web_sm' downloaded and loaded.")
+        except Exception as e:
+            print(f"Failed to download or load SpaCy model: {e}")
+    # Load SentenceTransformer for RAG/business info retrieval
     print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
     embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
     print("Sentence Transformer loaded.")
+    # Load a Cross-Encoder model for re-ranking retrieved documents
     print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
     reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
     print("Cross-Encoder Reranker loaded.")
 except Exception as e:
+    print(f"An error occurred during model/tool initialization: {e}")
     print(traceback.format_exc())
+# --- Google Sheets Authentication ---
 gc = None # Global variable for gspread client
 def authenticate_google_sheets():
     """Authenticates with Google Sheets using base64 encoded credentials."""
     print("Authenticating Google Account...")
     if not GOOGLE_BASE64_CREDENTIALS:
         print("Error: GOOGLE_BASE64_CREDENTIALS secret not found.")
+        print("Please add GOOGLE_BASE64_CREDENTIALS as a Space Secret.")
         return False
     try:
         print(traceback.format_exc())
         return False
+# --- Google Sheets Data Loading and Embedding ---
+data = [] # Global variable to store loaded data
 descriptions_for_embedding = []
 embeddings = torch.tensor([])
 business_info_available = False # Flag to indicate if business info was loaded successfully
 def load_business_info():
     """Loads business information from Google Sheet and creates embeddings."""
+    global data, descriptions_for_embedding, embeddings, business_info_available
     business_info_available = False # Reset flag
     if gc is None:
     if not SHEET_ID:
          print("Error: SHEET_ID not set.")
+         print("Please add SHEET_ID as a Space Secret.")
          return
     try:
         if not data_records:
             print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
+            data = []
             descriptions_for_embedding = []
         else:
             # Filter out rows missing 'Service' or 'Description'
             filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
             if not filtered_data:
                 print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
+                data = []
                 descriptions_for_embedding = []
             else:
+                data = filtered_data
                 descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]
                 if descriptions_for_embedding and embedder is not None:
                     print("Encoding descriptions...")
                     try:
                         business_info_available = True # Set flag if successful
                     except Exception as e:
                         print(f"Error during description encoding: {e}")
+                        embeddings = torch.tensor([])
                         business_info_available = False # Encoding failed
                 else:
                     print("Skipping encoding descriptions: No descriptions found or embedder not available.")
+                    embeddings = torch.tensor([])
+                    business_info_available = False
         print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
         if not business_info_available:
     except gspread.exceptions.SpreadsheetNotFound:
         print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
         print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
+        business_info_available = False
     except Exception as e:
         print(f"An error occurred while accessing the Google Sheet: {e}")
         print(traceback.format_exc())
+        business_info_available = False
+# --- Business Info Retrieval (RAG) ---
+def retrieve_business_info(query: str, top_n: int = 3) -> list:
     """
     Retrieves relevant business information from loaded data based on a query.
     """
     global data
     if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
         print("Business information retrieval is not available or data is empty.")
+        return []
     try:
         query_embedding = embedder.encode(query, convert_to_tensor=True)
         cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
+        top_results_indices = torch.topk(cosine_scores, k=min(top_n, len(data)))[1].tolist()
+        top_results = [data[i] for i in top_results_indices]
+        if reranker is not None and top_results:
+            print("Re-ranking top results...")
+            rerank_pairs = [(query, descriptions_for_embedding[i]) for i in top_results_indices]
             rerank_scores = reranker.predict(rerank_pairs)
+            reranked_indices = sorted(range(len(rerank_scores)), key=lambda i: rerank_scores[i], reverse=True)
+            reranked_results = [top_results[i] for i in reranked_indices]
             print("Re-ranking complete.")
+            return reranked_results
         else:
+            return top_results
     except Exception as e:
         print(f"Error during business information retrieval: {e}")
         print(traceback.format_exc())
+        return []
+# --- Tool Functions ---
 # Function to perform DuckDuckGo Search and return results with URLs
 def perform_duckduckgo_search(query: str, max_results: int = 5):
     """
     print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
     search_results_list = []
     try:
         time.sleep(1) # Sleep for 1 second
         with DDGS() as ddgs:
             if not query or len(query.split()) < 2:
                  print(f"Skipping search for short query: '{query}'")
                  return []
             results_generator = ddgs.text(query, max_results=max_results)
             results_found = False
             for r in results_generator:
                 search_results_list.append(r)
                 results_found = True
             if not results_found and max_results > 0:
                  print(f"DuckDuckGo search for '{query}' returned no results.")
     except Exception as e:
         print(f"Error during Duckduckgo search for '{query}': {e}")
         return []
     return search_results_list
 # Function to perform date calculation if needed
     """
     Analyzes query for date calculation requests and performs the calculation.
     Returns a dict describing the calculation and result, or None.
     """
     print(f"Executing Tool: perform_date_calculation with query='{query}')")
     query_lower = query.lower()
     return {"query": query, "description": desc, "result": None, "success": False}
+# --- Tool Definitions and System Prompt ---
 TOOL_DEFINITIONS = """
 Available tools:
 1.  **search**: Use this tool to perform a web search for current external information. Useful for facts, news, weather, etc.
 """
 tool_use_system_template = """<system>
 You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.
 MAX_HISTORY_TURNS = 5 # Keep last 5 turns
+# --- Chat Handler ---
+def respond(
+    message: str,
+    history: list[tuple[str, str]],
+    system_message: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
+):
+    global client # Ensure client is accessible
+    if client is None:
+        yield "Error: Hugging Face Inference Client not initialized. Please check your HF_TOKEN Space Secret."
+        return
+    # Retrieve relevant business information based on the user's message
+    retrieved_info = retrieve_business_info(message)
+    # Build ChatML conversation
+    messages = [{"role": "system", "content": system_message}]
+    # Include retrieved information as context if available
+    if retrieved_info:
+        context_message = "Use the following business information to help answer the user's question if relevant:\n"
+        for i, info in enumerate(retrieved_info):
+            context_message += f"--- Business Info Entry {i+1} ---\n"
+            for key, value in info.items():
+                 context_message += f"{key}: {str(value)}\n"
+            context_message += "---\n"
+        messages.append({"role": "user", "content": context_message})
+        print("Added retrieved business info to messages.")
+    # Add conversation history
+    for user_msg, bot_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    # Add the current user message
+    messages.append({"role": "user", "content": message})
+    # Stream tokens
+    response = ""
+    try:
+        for chunk in client.chat_completion(
+            messages=messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = chunk.choices[0].delta.content or ""
+            response += token
+            yield response
+    except Exception as e:
+        print(f"Error during chat completion: {e}")
+        print(traceback.format_exc())
+        yield f"An error occurred: {e}"
+# --- Gradio interface ---
 print(f"RAG functionality available: {business_info_available}")
 demo = gr.ChatInterface(
+    fn=respond,
     additional_inputs=[
+        gr.Textbox(value=tool_use_system_template.format(current_date=date.today().strftime('%Y-%m-%d'), tool_definitions=TOOL_DEFINITIONS), label="System message", interactive=False), # Use the template here
+        gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (nucleus sampling)"),
     ],
+    title="Gemma‑2‑9B‑IT Chat with RAG and Tools",
+    description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API, with business info retrieved from Google Sheets and external search/date tools.",
 )
+# Enable request queueing (concurrency handled automatically on Gradio ≥ 4)
 demo.queue()
 if __name__ == "__main__":
     # Authenticate and load data before launching the demo
+    # These steps need to happen when the script is run
     if authenticate_google_sheets():
         load_business_info()
     else:
         print("Google Sheets authentication failed. RAG functionality will not be available.")
     print(f"RAG functionality available: {business_info_available}")
+    # Launch the Gradio interface
+    # debug=True is useful for development, remove for production
+    demo.launch()