Futuresony commited on
Commit
9cd0bee
·
verified ·
1 Parent(s): 1e6371a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -728
app.py CHANGED
@@ -1,7 +1,6 @@
1
- # This block contains the full combined script for testing.
2
- # It includes all the code from the previous successful steps.
3
 
4
- # Combined Imports
5
  import os
6
  import gradio as gr
7
  from huggingface_hub import InferenceClient
@@ -13,80 +12,70 @@ import json
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
14
  from sentence_transformers import SentenceTransformer, util, CrossEncoder
15
  import gspread
16
- # from google.colab import auth
17
- from google.auth import default
18
  from tqdm import tqdm
19
  from duckduckgo_search import DDGS
20
  import spacy
21
  from datetime import date, timedelta
22
- from dateutil.relativedelta import relativedelta # Corrected typo
23
- import traceback # Import traceback
24
- import base64 # Import base64
25
-
26
-
27
- # Add PyTorch version and CUDA availability check
28
- print(f"PyTorch version: {torch.__version__}")
29
- print(f"Is CUDA available: {torch.cuda.is_available()}")
30
- if torch.cuda.is_available():
31
- print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
32
-
33
- # Optional: Add check for torchvision version if installed
34
- try:
35
- import torchvision
36
- print(f"Torchvision version: {torchvision.__version__}")
37
- except ImportError:
38
- print("Torchvision not installed.")
39
 
40
  # Suppress warnings
41
  warnings.filterwarnings("ignore", category=UserWarning)
42
 
43
- # Define global variables and load secrets
 
44
  HF_TOKEN = os.getenv("HF_TOKEN")
45
- SHEET_ID = "19ipxC2vHYhpXCefpxpIkpeYdI43a1Ku2kYwecgUULIw"
 
46
  GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
47
 
48
- # Initialize InferenceClient
49
- client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
50
-
51
- # Load spacy model for sentence splitting
52
  nlp = None
 
 
 
53
  try:
54
- nlp = spacy.load("en_core_web_sm")
55
- print("SpaCy model 'en_core_web_sm' loaded.")
56
- except OSError:
57
- print("SpaCy model 'en_core_web_sm' not found. Downloading...")
 
 
 
 
58
  try:
59
- os.system("python -m spacy download en_core_web_sm")
60
  nlp = spacy.load("en_core_web_sm")
61
- print("SpaCy model 'en_core_web_sm' downloaded and loaded.")
62
- except Exception as e:
63
- print(f"Failed to download or load SpaCy model: {e}")
64
-
 
 
 
 
 
 
65
 
66
- # Load SentenceTransformer for RAG/business info retrieval
67
- embedder = None
68
- try:
69
  print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
70
  embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
71
  print("Sentence Transformer loaded.")
72
- except Exception as e:
73
- print(f"Error loading Sentence Transformer: {e}")
74
-
75
 
76
- # Load a Cross-Encoder model for re-ranking retrieved documents
77
- reranker = None
78
- try:
79
  print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
80
  reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
81
  print("Cross-Encoder Reranker loaded.")
 
82
  except Exception as e:
83
- print(f"Error loading Cross-Encoder Reranker: {e}")
84
- print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
85
  print(traceback.format_exc())
86
- reranker = None
87
 
88
 
89
- # Google Sheets Authentication
90
  gc = None # Global variable for gspread client
91
  def authenticate_google_sheets():
92
  """Authenticates with Google Sheets using base64 encoded credentials."""
@@ -94,6 +83,7 @@ def authenticate_google_sheets():
94
  print("Authenticating Google Account...")
95
  if not GOOGLE_BASE64_CREDENTIALS:
96
  print("Error: GOOGLE_BASE64_CREDENTIALS secret not found.")
 
97
  return False
98
 
99
  try:
@@ -111,16 +101,15 @@ def authenticate_google_sheets():
111
  print(traceback.format_exc())
112
  return False
113
 
114
- # Google Sheets Data Loading and Embedding
115
- # business_data = [] # Global variable to store loaded data - This was intended to be global, but needs to be named 'data' to match usage
116
- data = [] # Global variable to store loaded data - Renamed to 'data'
117
  descriptions_for_embedding = []
118
  embeddings = torch.tensor([])
119
  business_info_available = False # Flag to indicate if business info was loaded successfully
120
 
121
  def load_business_info():
122
  """Loads business information from Google Sheet and creates embeddings."""
123
- global data, descriptions_for_embedding, embeddings, business_info_available # Added 'data' to global
124
  business_info_available = False # Reset flag
125
 
126
  if gc is None:
@@ -129,6 +118,7 @@ def load_business_info():
129
 
130
  if not SHEET_ID:
131
  print("Error: SHEET_ID not set.")
 
132
  return
133
 
134
  try:
@@ -138,21 +128,19 @@ def load_business_info():
138
 
139
  if not data_records:
140
  print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
141
- data = [] # Use the global 'data'
142
  descriptions_for_embedding = []
143
  else:
144
  # Filter out rows missing 'Service' or 'Description'
145
  filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
146
  if not filtered_data:
147
  print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
148
- data = [] # Use the global 'data'
149
  descriptions_for_embedding = []
150
  else:
151
- data = filtered_data # Assign to the global 'data'
152
- # Use BOTH Service and Description for embedding
153
  descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]
154
 
155
- # Only encode if descriptions_for_embedding are found and embedder is available
156
  if descriptions_for_embedding and embedder is not None:
157
  print("Encoding descriptions...")
158
  try:
@@ -161,12 +149,12 @@ def load_business_info():
161
  business_info_available = True # Set flag if successful
162
  except Exception as e:
163
  print(f"Error during description encoding: {e}")
164
- embeddings = torch.tensor([]) # Ensure embeddings is an empty tensor on error
165
  business_info_available = False # Encoding failed
166
  else:
167
  print("Skipping encoding descriptions: No descriptions found or embedder not available.")
168
- embeddings = torch.tensor([]) # Ensure embeddings is an empty tensor
169
- business_info_available = False # Cannot use RAG without descriptions or embedder
170
 
171
  print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
172
  if not business_info_available:
@@ -175,88 +163,45 @@ def load_business_info():
175
  except gspread.exceptions.SpreadsheetNotFound:
176
  print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
177
  print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
178
- business_info_available = False # Sheet not found
179
  except Exception as e:
180
  print(f"An error occurred while accessing the Google Sheet: {e}")
181
  print(traceback.format_exc())
182
- business_info_available = False # Other sheet access error
183
 
184
- # Business Info Retrieval (RAG)
185
- def retrieve_business_info(query: str, threshold: float = 0.50, max_matches: int = 5) -> tuple[list, float]:
186
  """
187
  Retrieves relevant business information from loaded data based on a query.
188
-
189
- Args:
190
- query: The user's query string.
191
- threshold: Minimum relevance score for a match.
192
- max_matches: The maximum number of top relevant entries to retrieve *before* thresholding.
193
-
194
- Returns:
195
- A tuple containing:
196
- - A list of dictionaries, where each dictionary is a relevant row from the
197
- Google Sheet data that meets the threshold.
198
- - The score of the best match found (even if below threshold).
199
- Returns an empty list and score 0.0 if RAG is not available or
200
- no relevant information is found.
201
  """
202
  global data
203
  if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
204
  print("Business information retrieval is not available or data is empty.")
205
- return [], 0.0
206
 
207
  try:
208
  query_embedding = embedder.encode(query, convert_to_tensor=True)
209
  cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
 
 
210
 
211
- # Get top N results *before* thresholding to allow re-ranking on potentially relevant items
212
- top_n_indices_pre_rerank = torch.topk(cosine_scores, k=min(max_matches * 2, len(data)))[1].tolist() # Get more for better re-ranking pool
213
- top_n_pre_rerank_results = [data[i] for i in top_n_indices_pre_rerank]
214
- descriptions_pre_rerank = [descriptions_for_embedding[i] for i in top_n_indices_pre_rerank]
215
-
216
- best_score_overall = torch.max(cosine_scores).item() if len(cosine_scores) > 0 else 0.0
217
-
218
-
219
- # Optional: Re-rank the top results using the Cross-Encoder
220
- reranked_results_with_scores = []
221
- if reranker is not None and top_n_pre_rerank_results:
222
- print("Re-ranking top results before thresholding...")
223
- rerank_pairs = [(query, desc) for desc in descriptions_pre_rerank]
224
  rerank_scores = reranker.predict(rerank_pairs)
225
-
226
- # Pair original results with rerank scores and sort
227
- paired_results_with_scores = list(zip(top_n_pre_rerank_results, rerank_scores))
228
- reranked_paired_results = sorted(paired_results_with_scores, key=lambda item: item[1], reverse=True)
229
-
230
  print("Re-ranking complete.")
231
- # Apply threshold and max_matches *after* re-ranking
232
- filtered_results = []
233
- for item, score in reranked_paired_results:
234
- if score >= threshold:
235
- filtered_results.append(item)
236
- if len(filtered_results) >= max_matches: # Apply max_matches here
237
- break
238
- return filtered_results, best_score_overall
239
-
240
  else:
241
- # If reranker is not available, apply threshold and max_matches directly to cosine scores
242
- print("Reranker not available or no results to rerank. Applying threshold and max_matches to cosine scores.")
243
- filtered_results = []
244
- sorted_cosine_scores, sorted_indices = torch.sort(cosine_scores, descending=True)
245
- for i in range(min(max_matches, len(data))): # Take top N based on cosine
246
- if sorted_cosine_scores[i].item() >= threshold: # Check threshold
247
- filtered_results.append(data[sorted_indices[i].item()])
248
- else:
249
- break # Stop if score drops below threshold
250
-
251
- return filtered_results, best_score_overall
252
-
253
 
254
  except Exception as e:
255
  print(f"Error during business information retrieval: {e}")
256
  print(traceback.format_exc())
257
- return [], 0.0
258
-
259
 
 
260
  # Function to perform DuckDuckGo Search and return results with URLs
261
  def perform_duckduckgo_search(query: str, max_results: int = 5):
262
  """
@@ -267,29 +212,21 @@ def perform_duckduckgo_search(query: str, max_results: int = 5):
267
  print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
268
  search_results_list = []
269
  try:
270
- # Add a delay before each search
271
  time.sleep(1) # Sleep for 1 second
272
-
273
  with DDGS() as ddgs:
274
  if not query or len(query.split()) < 2:
275
  print(f"Skipping search for short query: '{query}'")
276
  return []
277
-
278
- # Use text() method for general text search
279
  results_generator = ddgs.text(query, max_results=max_results)
280
  results_found = False
281
  for r in results_generator:
282
  search_results_list.append(r)
283
  results_found = True
284
-
285
  if not results_found and max_results > 0:
286
  print(f"DuckDuckGo search for '{query}' returned no results.")
287
-
288
-
289
  except Exception as e:
290
  print(f"Error during Duckduckgo search for '{query}': {e}")
291
  return []
292
-
293
  return search_results_list
294
 
295
  # Function to perform date calculation if needed
@@ -297,8 +234,6 @@ def perform_date_calculation(query: str):
297
  """
298
  Analyzes query for date calculation requests and performs the calculation.
299
  Returns a dict describing the calculation and result, or None.
300
- Handles formats like 'X days ago', 'X days from now', 'X weeks ago', 'X weeks from now', 'what is today's date'.
301
- Uses dateutil for slightly more flexibility (though core logic remains simple).
302
  """
303
  print(f"Executing Tool: perform_date_calculation with query='{query}')")
304
  query_lower = query.lower()
@@ -357,9 +292,7 @@ def perform_date_calculation(query: str):
357
  return {"query": query, "description": desc, "result": None, "success": False}
358
 
359
 
360
- # --- Tool Definitions for the Model ---
361
- # Describe the tools available to the model in a structured format
362
- # This will be injected into the prompt.
363
  TOOL_DEFINITIONS = """
364
  Available tools:
365
  1. **search**: Use this tool to perform a web search for current external information. Useful for facts, news, weather, etc.
@@ -383,10 +316,6 @@ Available tools:
383
 
384
  """
385
 
386
- # --- System Prompt Template for Tool Use ---
387
- # This template instructs the model on how to use the tools and format its output.
388
- # Inject this *within* the user message content.
389
- # MODIFIED to ask for COMPREHENSIVE answers
390
  tool_use_system_template = """<system>
391
  You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.
392
 
@@ -427,616 +356,95 @@ Output ONLY tool calls within <tool_code> tags or a final answer using the 'answ
427
  MAX_HISTORY_TURNS = 5 # Keep last 5 turns
428
 
429
 
430
- # --- Chat Logic Function with Tool Use ---
431
- # Modified to be a generator function
432
- def chat_with_bot(user_input, chat_history_state, system_message, max_new_tokens, temperature, top_p): # Added parameters from Gradio interface
433
- """
434
- Processes user input through an iterative tool-use logic for Gradio interface.
435
- Takes user_input string and chat_history_state (list of lists) as input.
436
- Yields the updated chat_history_state (list of lists) at each step.
437
- Uses a structured tool-calling approach.
438
- Guaranteed strict user/assistant role alternation in model_chat_history.
439
- """
440
- # Basic Input Safety Check (Example)
441
- if any(phrase in user_input.lower() for phrase in ["harmful content", "malicious intent"]):
442
- safe_response = "I cannot process requests that involve harmful or inappropriate content."
443
- yield chat_history_state + [[user_input, safe_response]] # Yield immediately for display
444
- return # Exit the generator
445
-
446
- # Append user message to history immediately for display
447
- # The bot message will be updated iteratively
448
- # We append a placeholder now, and update it with the final response later.
449
- # Ensure the initial yield is a list of lists for Gradio, even if it's just one turn
450
- initial_history = chat_history_state + [[user_input, "..."]]
451
- yield initial_history # Yield state with placeholder
452
-
453
- original_user_input = user_input
454
- print(f"\n--- Starting turn with input: {user_input} ---") # Debug Print
455
-
456
- # Get current date
457
- current_date = date.today().strftime('%Y-%m-%d')
458
- print(f"Current Date: {current_date}") # Debug Print
459
-
460
- # Initialize unique_urls here to avoid UnboundLocalError
461
- unique_urls = set() # Collect URLs for Sources section
462
-
463
- # Maintain an internal model history that strictly alternates user/assistant roles
464
- # This history will be used directly by apply_chat_template.
465
- # It represents the conversation *as the model sees it*, including tool calls/results.
466
- # Build this history from the *completed* past turns from chat_history_state.
467
- model_chat_history = []
468
-
469
- # Convert Gradio chat history (list of lists) to model history (list of dicts)
470
- # Ensure strict alternation: user, assistant, user, assistant...
471
- # Only add complete turns from the *past* history (exclude the current incomplete turn)
472
- # Limit the history length
473
- history_to_process = chat_history_state # Use the full history passed to the function initially
474
-
475
- # Ensure we only take pairs [user, bot] from past history where bot is NOT the initial placeholder
476
- # This guarantees that the last message in `recent_complete_turns` corresponds to a *completed* assistant response.
477
- complete_past_turns = [
478
- turn for turn in history_to_process
479
- if turn is not None and len(turn) == 2 and turn[0] is not None and turn[1] is not None and str(turn[1]).strip() != "..."
480
- ]
481
-
482
- # Take the last MAX_HISTORY_TURNS complete turns
483
- recent_complete_turns = complete_past_turns[max(0, len(complete_past_turns) - MAX_HISTORY_TURNS):]
484
-
485
- for user_msg, bot_msg in recent_complete_turns:
486
- # Add user message (must be present)
487
- if user_msg is not None: # Should always be True based on complete_past_turns filter
488
- model_chat_history.append({"role": "user", "content": str(user_msg).strip()})
489
- # Add assistant message (must be present and non-placeholder based on complete_past_turns filter)
490
- if bot_msg is not None and str(bot_msg).strip() != "...": # Should always be True based on filter
491
- model_chat_history.append({"role": "assistant", "content": str(bot_msg).strip()})
492
-
493
- # Initialize variables for the tool-use loop
494
- max_tool_iterations = 5 # Limit the number of tool calls in a single turn to prevent infinite loops
495
- final_response_text = None # Variable to hold the final answer from the 'answer' tool
496
- current_tool_results_text = "" # Accumulate tool results text for the *next* model call in this turn
497
-
498
- print("Starting tool execution loop...")
499
-
500
- try: # This is the main try block for the chat_with_bot function
501
- for i in range(max_tool_iterations):
502
- print(f"\n--- Tool Iteration {i+1} ---")
503
-
504
- # Step 1 & 2: Prepare the user message content for THIS iteration and append to history
505
- # The content of the user message for this iteration depends on whether it's the first step
506
- # (original query + system prompt) or a subsequent step (tool results).
507
- current_user_message_content = ""
508
- if i == 0:
509
- # First iteration: Include the system template and the original user input
510
- system_prompt_content = tool_use_system_template.format(
511
- current_date=current_date,
512
- tool_definitions=TOOL_DEFINITIONS
513
- )
514
- current_user_message_content = system_prompt_content + "\n\nUser Query: " + original_user_input
515
- else:
516
- # Subsequent iterations: Include the tool results from the previous assistant response.
517
- if current_tool_results_text:
518
- current_user_message_content = "<tool_results>\n" + current_tool_results_text.strip() + "\n</tool_results>"
519
- current_tool_results_text = "" # Clear the buffer after adding to the prompt
520
- else:
521
- # If no new tool results were accumulated in the previous step (e.g., parsing failed, no tools called),
522
- # send a message indicating this so the model doesn't wait indefinitely.
523
- current_user_message_content = "<tool_results>No new results or no tools were called in the previous turn.</tool_results>"
524
- print("No new tool results to add for this iteration.")
525
-
526
- # Append the user message for the current iteration to the main model history.
527
- # This history is what apply_chat_template will process.
528
- # If the logic is correct, model_chat_history should always end with an 'assistant' role
529
- # before this append, except for the very first turn of the conversation.
530
- model_chat_history.append({"role": "user", "content": current_user_message_content.strip()})
531
-
532
-
533
- # Step 3 & 4: Apply template to get the full prompt and Generate model output
534
- # The history `model_chat_history` should now be in the correct state for generation:
535
- # starting with 'user' and ending with the current 'user' message.
536
- # The check below verifies the strict alternation before tokenization.
537
- if len(model_chat_history) > 1 and model_chat_history[-1]['role'] == model_chat_history[-2]['role']:
538
- print("Error: History roles are not alternating before generation!")
539
- print("History:", model_chat_history)
540
- final_response_text = "Sorry, I encountered an internal error with the conversation history format before generation."
541
- break # Break the tool loop if history is malformed
542
-
543
-
544
- prompt_for_generation = client.chat_completion(
545
- messages=model_chat_history, # Use the main model_chat_history directly
546
- max_tokens=max_new_tokens, # Use max_new_tokens parameter
547
- stream=False, # Use non-streaming for tool parsing loop
548
- temperature=temperature, # Use temperature parameter
549
- top_p=top_p, # Use top_p parameter
550
- # Add tool_choice="auto" or specific tool names if the API supports it
551
- # For Gemma-2-IT, we rely on prompt engineering for tool calls.
552
- )
553
-
554
- raw_model_output = ""
555
- if prompt_for_generation and prompt_for_generation.choices:
556
- raw_model_output = prompt_for_generation.choices[0].message.content.strip()
557
- else:
558
- print("Model returned an empty response or no choices.")
559
- raw_model_output = "<system_error>Error: Model returned empty response.</system_error>" # Report error via system tag
560
-
561
-
562
- print(f"Raw model output: {raw_model_output}")
563
-
564
- # Step 5: Append the model's raw output as the assistant message for THIS iteration
565
- # This is crucial for maintaining the alternation in `model_chat_history`
566
- model_chat_history.append({"role": "assistant", "content": raw_model_output.strip()})
567
-
568
-
569
- # Step 6: Parse Tool Calls from the latest assistant message (which is now the last entry in history)
570
- tool_calls = []
571
- # Use regex to find all content within <tool_code> tags in the latest assistant message
572
- matches = re.findall(r'<tool_code>(.*?)</tool_code>', model_chat_history[-1]['content'], re.DOTALL)
573
-
574
- if not matches:
575
- print("No tool calls found in latest model output.")
576
- # If no tool calls, check if the model tried to output an answer directly
577
- # This is a fallback if the model fails to use the 'answer' tool.
578
- # Apply cleanup patterns just to the latest assistant message to see if it's a potential answer
579
- cleaned_potential_answer = re.sub(r'<tool_code>.*?</tool_code>', '', model_chat_history[-1]['content'], flags=re.DOTALL) # Remove tool tags first
580
- cleaned_potential_answer = re.sub(r'<.*?>', '', cleaned_potential_answer).strip() # Remove any other potential tags
581
-
582
- # If the cleaned output is not empty or just whitespace, treat it as a potential final answer
583
- if cleaned_potential_answer and final_response_text is None:
584
- print("Model output does not contain tool calls, treating cleaned output as potential direct answer.")
585
- final_response_text = cleaned_potential_answer
586
- break # Exit the tool loop as we have a response
587
-
588
- # If no tool calls and not a potential answer, check for explicit system errors reported by the model
589
- if "<system_error>" in model_chat_history[-1]['content'] or "<error>" in model_chat_history[-1]['content']:
590
- print("Model output contains system error tags. Exiting tool loop.")
591
- # The synthesis step will pick up these errors from the history
592
- break # Exit loop on critical error reported by the model
593
-
594
-
595
- # If no tool calls and not a potential answer, and no explicit error, the loop might continue.
596
- # The next iteration's user message content will be generated as "No new results..."
597
- continue # Skip to the next iteration
598
-
599
-
600
- # Step 7: Execute Tool Calls and accumulate results for the *next* iteration's user message
601
- # We clear the buffer here, as we are processing the *latest* assistant message's tools.
602
- current_tool_results_text = ""
603
- answer_tool_called_in_this_iter = False # Reset flag for this iteration's output
604
-
605
- for match in matches:
606
- try:
607
- # Attempt to parse the content within the tags as JSON
608
- tool_call_json = json.loads(match.strip())
609
- if "tool_name" in tool_call_json and "parameters" in tool_call_json:
610
- tool_name = tool_call_json.get("tool_name")
611
- parameters = tool_call_json.get("parameters", {})
612
-
613
- if tool_name == "answer":
614
- final_response_text = parameters.get("text", "")
615
- answer_tool_called_in_this_iter = True
616
- print(f"Model called 'answer' tool. Final response intended: '{final_response_text}'")
617
- # Once the 'answer' tool is called, we prioritize exiting the loop after this iteration.
618
- # We still process any other tool calls in this *same* model output, but then break the loop afterwards.
619
- continue # Process next tool call in the same output (from the same model output)
620
-
621
-
622
- elif tool_name == "search":
623
- query = parameters.get("query")
624
- max_results = parameters.get("max_results", 5)
625
- if query:
626
- print(f"Executing Tool: search with query='{query}', max_results={max_results}")
627
- results = perform_duckduckgo_search(query, max_results)
628
- current_tool_results_text += f"<search_results_for_query query='{query}'>\n"
629
- if results:
630
- for r in results:
631
- snippet = r.get('body', 'N/A')
632
- if len(snippet) > 300:
633
- snippet = snippet[:300] + "..."
634
- current_tool_results_text += f"<item>\n<title>{r.get('title', 'N/A')}</title>\n<snippet>{snippet}</snippet>\n<url>{r.get('href', 'N/A')}</url>\n</item>\n"
635
- print(f"Executed search for '{query}'. Found {len(results)} results.")
636
- else:
637
- current_tool_results_text += "No results found.\n"
638
- print(f"No search results found for '{query}'.")
639
- current_tool_results_text += "</search_results_for_query>\n"
640
-
641
- else:
642
- current_tool_results_text += f"<search_results_for_query query='{query}'><error>Missing 'query' parameter.</error></search_results_for_query>\n"
643
- print(f"Skipping search tool call: Missing 'query' parameter.")
644
-
645
-
646
- elif tool_name == "lookup_business_info":
647
- query = parameters.get("query")
648
- # Use the threshold and max_matches provided by the model, or the defaults
649
- threshold = parameters.get("threshold", 0.50)
650
- max_matches = parameters.get("max_matches", 5) # Use max_matches parameter
651
- if query:
652
- print(f"Executing Tool: lookup_business_info with query='{query}', threshold={threshold:.4f}, max_matches={max_matches}")
653
- # retrieve_business_info now returns a LIST of matches and the best score
654
- matches_list, best_score = retrieve_business_info(query, threshold=threshold, max_matches=max_matches)
655
- # MODIFIED: Format results block to contain MULTIPLE match tags
656
- current_tool_results_text += f"<lookup_business_info_results_for_query query='{query}' requested_threshold='{threshold:.4f}' requested_max_matches='{max_matches}' final_best_score='{best_score:.4f}'>\n"
657
- if matches_list: # Check if the list is not empty
658
- for match in matches_list: # Iterate through the list of matches
659
- if isinstance(match, dict): # Ensure it's a dictionary
660
- current_tool_results_text += f"<match>\n"
661
- current_tool_results_text += f"<service>{match.get('Service', 'N/A')}</service>\n"
662
- current_tool_results_text += f"<description>{match.get('Description', 'N/A')}</description>\n"
663
- current_tool_results_text += f"<price>{match.get('Price', 'N/A')}</price>\n"
664
- current_tool_results_text += f"<available>{match.get('Available', 'N/A')}</available>\n"
665
- # Add other relevant fields from your sheet here if needed for synthesis
666
- # e.g., <contact_person> etc.
667
- current_tool_results_text += f"</match>\n"
668
- # Optionally add a note if any item in the list was not a dict
669
- else:
670
- print(f"Warning: Item in retrieved_business_info list was not a dict: {match}")
671
-
672
-
673
- print(f"Executed business lookup for '{query}'. Found {len(matches_list)} matches above threshold {threshold:.4f}. Best score: {best_score:.4f}.")
674
- else:
675
- # This case covers No matches found above threshold within retrieve_business_info
676
- current_tool_results_text += f"No relevant matches found above threshold {threshold:.4f} (best score: {best_score:.4f}).\n"
677
- print(f"Executed business lookup for '{query}'. No matches found above threshold.")
678
- # Add a note about the best score being below threshold
679
- if best_score > 0: # Only add note if *some* match was found, but not above threshold
680
- current_tool_results_text += f"<system_note>Best match score ({best_score:.4f}) was below the requested threshold ({threshold:.4f}).</system_note>\n"
681
-
682
- current_tool_results_text += "</lookup_business_info_results_for_query>\n"
683
- else:
684
- current_tool_results_text += f"<lookup_business_info_results_for_query query='{query}'><error>Missing 'query' parameter.</error></lookup_business_info_results_for_query>\n"
685
- print(f"Skipping business lookup tool call: Missing 'query' parameter.")
686
-
687
-
688
- elif tool_name == "perform_date_calculation":
689
- query = parameters.get("query")
690
- if query:
691
- print(f"Executing Tool: perform_date_calculation with query='{query}'")
692
- result = perform_date_calculation(query) # This function already returns a dict or error
693
- current_tool_results_text += f"<perform_date_calculation_results_for_query query='{query}'>\n"
694
- if result and result.get('success'): # Check the 'success' key
695
- current_tool_results_text += f"<description>{result.get('description', 'Calculation Successful')}</description>\n<date>{result.get('result')}</date>\n"
696
- print(f"Executed date calculation for '{query}'. Result: {result.get('result')}.")
697
- elif result and result.get('description'):
698
- current_tool_results_text += f"<description>{result.get('description')}</description>\n" # Report description if result is None or not success
699
- print(f"Executed date calculation for '{query}'. Failed: {result.get('description')}.")
700
- elif isinstance(result, str) and result.startswith("Error"):
701
- current_tool_results_text += f"<error>{result}</error>\n" # Report error string
702
- print(f"Executed date calculation for '{query}'. Error: {result}.")
703
- else: # Generic failure case
704
- current_tool_results_text += "Calculation failed or no specific date recognized.\n"
705
- print(f"Executed date calculation for '{query}'. No specific result.")
706
- current_tool_results_text += "</perform_date_calculation_results_for_query>\n"
707
- else:
708
- current_tool_results_text += f"<perform_date_calculation_results_for_query query='{query}'><error>Missing 'query' parameter.</error></lookup_business_info_results_for_query>\n"
709
- print(f"Skipping date calculation tool call: Missing 'query' parameter.")
710
-
711
-
712
- else:
713
- print(f"Unknown tool requested by model: {tool_name}")
714
- # Add a note to results buffer about the unknown tool
715
- current_tool_results_text += f"<system_note>Unknown tool requested: {tool_name}</system_note>\n"
716
-
717
- else:
718
- print(f"Parsed JSON missing 'tool_name' or 'parameters': {tool_call_json}")
719
- current_tool_results_text += f"<system_note>Failed to parse tool call: Missing 'tool_name' or 'parameters' in JSON: {match.strip()}</system_note>\n"
720
- except json.JSONDecodeError as e:
721
- print(f"Failed to parse tool call JSON: {e}")
722
- print(f"Content was: {match.strip()}")
723
- current_tool_results_text += f"<system_note>Failed to parse tool call JSON: {e}. Content: {match.strip()}</system_note>\n"
724
- except Exception as e:
725
- print(f"An unexpected error occurred during tool execution for call '{tool_call_json}': {e}") # Changed tool_call_json_str to tool_call_json
726
- print(traceback.format_exc()) # Print traceback for tool execution errors
727
- current_tool_results_text += f"<system_note>An unexpected error occurred during tool call processing: {e}. Content: {match.strip()}</system_note>\n"
728
-
729
-
730
- # Step 8: Check if the 'answer' tool was called in this iteration
731
- if answer_tool_called_in_this_iter:
732
- print("Answer tool called. Exiting tool loop.")
733
- break # Exit the main tool iteration loop
734
-
735
- # Step 9: If max iterations reached and 'answer' tool wasn't called
736
- if i == max_tool_iterations - 1 and final_response_text is None:
737
- print(f"Max tool iterations reached ({max_tool_iterations}) without 'answer' call.")
738
- # Add a final note to the results buffer so the model sees it in the last forced synthesis step
739
- current_tool_results_text += "<system_note>Maximum tool calls reached. Please provide a final answer based on the information gathered so far or state that the request cannot be fully fulfilled.</system_note>\n"
740
- # Fall through to the final response generation step below
741
-
742
-
743
- # --- End of the main try block for chat_with_bot ---
744
- # THIS EXCEPT BLOCK NEEDS TO BE AT THE SAME INDENTATION LEVEL AS THE 'try' ABOVE
745
- except Exception as e: # This except matches the 'try' block at the beginning of the function
746
- print(f"An unexpected error occurred in the chat_with_bot function: {e}")
747
- print(traceback.format_exc()) # Print full traceback for debugging
748
- final_response_text = f"Sorry, I encountered an unexpected error while processing your request: {e}"
749
- # In case of error, ensure final_response_text is set so we proceed to update history
750
-
751
-
752
- # The code below runs AFTER the tool iteration loop and its enclosing try/except finishes
753
-
754
- # --- Final Response Generation (Synthesis) ---
755
- # This step is either using the text from the 'answer' tool call,
756
- # or generating a fallback response if the model failed to call 'answer'.
757
-
758
- print("\n--- Final Response Generation ---")
759
-
760
- # If the model successfully called the 'answer' tool, use that text.
761
- # Otherwise, construct a synthesis prompt for the model to generate a final answer.
762
- if final_response_text is None:
763
- print("Model did not call 'answer' tool. Falling back to synthesis prompt.")
764
- # Model failed to call the 'answer' tool within iterations or encountered an error.
765
- # Fallback: Generate a response based on the accumulated history and tool results.
766
-
767
- # The history `model_chat_history` now contains the full trace of tool calls
768
- # and the user messages containing the tool results.
769
-
770
- # Construct the synthesis prompt content.
771
- # MODIFIED Synthesis Prompt to emphasize comprehensive answer
772
- synthesis_prompt_content = """<system>
773
- Please provide a final, comprehensive answer to the user's original query based on ALL the information gathered from the executed tools and the conversation history. Synthesize the information into a coherent, natural language response. Pay special attention to providing detailed descriptions and listing all relevant points found from the business lookup tool when multiple items were retrieved.
774
-
775
- User's original query: "{original_user_input}"
776
-
777
- Information gathered from tools and process notes:
778
- {gathered_info_summary}
779
-
780
- Synthesize ALL relevant information into a clear, concise, and **comprehensive** natural language response for the user. When presenting information from multiple business lookup results, structure your answer to clearly describe each item found (e.g., list them, describe each one fully).
781
-
782
- **Guidelines for your response:**
783
- - Address the user's original question directly.
784
- - Use the information provided in the 'Information gathered' section, synthesizing details from all relevant results.
785
- - If the business lookup returned multiple matches, present the information for *each* match found clearly and informatively.
786
- - If a tool was executed but returned no relevant results (especially if the best score was below the threshold), or if there were errors (<system_error>, <error>, <system_note> tags), explain this gracefully to the user.
787
- - Maintain a helpful, polite, and professional business tone, reflecting the Futuresony brand and your identity as FutureAi.
788
- - Do NOT include raw tool call or result tags in your final answer.
789
- - If you were unable to gather necessary information, clearly state what you could and could not find.
790
-
791
- After your answer, generate 2-3 concise follow-up questions that might be helpful or relevant to the user based on the conversation and your response. List these questions clearly at the end.
792
- If Search Results were used, list the relevant URLs under a "Sources:" heading at the very end.
793
- </system>
794
- """
795
-
796
- # Summarize the gathered information by processing the model_chat_history
797
- gathered_info_summary = ""
798
- # unique_urls = set() # Moved initialization outside the if block # Commented out as initialization is now at the beginning of the function
799
-
800
- # Iterate through the model history to find user messages that followed an assistant message
801
- # These 'user' messages should contain the tool results block if tools were run.
802
- # We iterate up to the second-to-last message, as the *very* last message in history
803
- # will be the synthesis prompt itself, which hasn't been processed yet.
804
- for i in range(1, len(model_chat_history)):
805
- # Look for 'user' messages that follow an 'assistant' message
806
- if model_chat_history[i]['role'] == 'user' and isinstance(model_chat_history[i]['content'], str) and '<tool_results>' in model_chat_history[i]['content']:
807
- msg_content = model_chat_history[i]['content']
808
- # Check if it contains the tool results block
809
- tool_results_block = re.search(r'<tool_results>(.*?)</tool_results>', msg_content, re.DOTALL)
810
- if tool_results_block:
811
- content = tool_results_block.group(1) # Content inside <tool_results>
812
-
813
- # --- Extract and format info from tool result blocks ---
814
- search_blocks = re.findall(r'<search_results_for_query.*?>(.*?)</search_results_for_query>', content, re.DOTALL)
815
- for sr_content in search_blocks:
816
- query_match = re.search(r"query='(.*?)'", sr_content) # Extract query attribute
817
- query = query_match.group(1) if query_match else "Unknown"
818
- gathered_info_summary += f"Search results for '{query}':\n"
819
- items = re.findall(r'<item>(.*?)</item>', sr_content, re.DOTALL)
820
- if items:
821
- for item_content in items:
822
- title = re.search(r'<title>(.*?)</title>', item_content, re.DOTALL)
823
- snippet = re.search(r'<snippet>(.*?)</snippet>', item_content, re.DOTALL)
824
- url = re.search(r'<url>(.*?)</url>', item_content, re.DOTALL)
825
- title_text = title.group(1).strip() if title else 'N/A'
826
- snippet_text = snippet.group(1).strip() if snippet else 'N/A'
827
- url_text = url.group(1).strip() if url else 'N/A'
828
- gathered_info_summary += f"- Title: {title_text}, Snippet: {snippet_text}\n"
829
- if url_text and url_text != 'N/A':
830
- unique_urls.add(url_text) # Add URL to set
831
-
832
- elif "No results found" in sr_content:
833
- gathered_info_summary += "- No results found.\n"
834
- elif "<error>" in sr_content:
835
- error_text = re.search(r'<error>(.*?)</error>', sr_content, re.DOTALL)
836
- gathered_info_summary += f"- Error during search: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
837
-
838
-
839
- # Business lookup results (MODIFIED to handle MULTIPLE match tags)
840
- lookup_blocks = re.findall(r'<lookup_business_info_results_for_query.*?>(.*?)</lookup_business_info_results_for_query>', content, re.DOTALL)
841
- for lr_content in lookup_blocks:
842
- query_match = re.search(r"query='(.*?)'", lr_content)
843
- query = query_match.group(1) if query_match else "Unknown"
844
- # Extract requested_threshold, requested_max_matches, final_best_score
845
- req_thresh_match = re.search(r"requested_threshold='(.*?)'", lr_content)
846
- req_thresh = float(req_thresh_match.group(1)) if req_thresh_match else 0.50
847
- req_max_matches_match = re.search(r"requested_max_matches='(.*?)'", lr_content)
848
- req_max_matches = int(req_max_matches_match.group(1)) if req_max_matches_match else 5
849
- final_best_score_match = re.search(r"final_best_score='(.*?)'", lr_content)
850
- final_best_score = float(final_best_score_match.group(1)) if final_best_score_match else 0.0
851
-
852
-
853
- gathered_info_summary += f"Business lookup results for '{query}' (Requested Threshold: {req_thresh:.4f}, Requested Max Matches: {req_max_matches}, Final Best Score: {final_best_score:.4f}):\n"
854
-
855
- matches_found = re.findall(r'<match>(.*?)</match>', lr_content, re.DOTALL) # Find ALL match tags
856
- if matches_found:
857
- gathered_info_summary += f" Found {len(matches_found)} relevant item(s):\n"
858
- for match_content in matches_found: # Iterate through each match
859
- service = re.search(r'<service>(.*?)</service>', match_content, re.DOTALL)
860
- description = re.search(r'<description>(.*?)</description>', match_content, re.DOTALL)
861
- price = re.search(r'<price>(.*?)</price>', match_content, re.DOTALL)
862
- available = re.search(r'<available>(.*?)</available>', match_content, re.DOTALL)
863
- # Add extraction for other fields if you include them in your tool output
864
- # contact_person = re.search(r'<contact_person>(.*?)</contact_person>', match_content, re.DOTALL)
865
-
866
- gathered_info_summary += f" - Service: {service.group(1).strip() if service else 'N/A'}\n"
867
- gathered_info_summary += f" Description: {description.group(1).strip() if description else 'N/A'}\n"
868
- gathered_info_summary += f" Price: {price.group(1).strip() if price else 'N/A'}\n"
869
- gathered_info_summary += f" Available: {available.group(1).strip() if available else 'N/A'}\n"
870
- # Add other fields here...
871
- # if contact_person: gathered_info_summary += f" Contact Person: {contact_person.group(1).strip()}\n"
872
-
873
- elif "No relevant matches found" in lr_content:
874
- score_match = re.search(r"final_best_score='(.*?)'", lr_content) # Look for final_best_score
875
- score = float(score_match.group(1)) if score_match else 0.0
876
- threshold_match = re.search(r"requested_threshold='(.*?)'", lr_content)
877
- threshold_val = float(threshold_match.group(1)) if threshold_match else 0.50
878
-
879
- gathered_info_summary += f" No relevant matches found above threshold {threshold_val:.4f} (best score: {score:.4f}).\n"
880
- elif "<error>" in lr_content:
881
- error_text = re.search(r'<error>(.*?)</error>', lr_content, re.DOTALL)
882
- gathered_info_summary += f" Error during business lookup: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
883
-
884
- # Include system notes found within the business lookup results block
885
- system_notes_in_lookup = re.findall(r'<system_note>(.*?)</system_note>', lr_content, re.DOTALL)
886
- for note in system_notes_in_lookup:
887
- gathered_info_summary += f" System Note within Lookup: {note.strip()}\n"
888
-
889
-
890
- # Date calculation results
891
- date_blocks = re.findall(r'<perform_date_calculation_results_for_query.*?>(.*?)</perform_date_calculation_results_for_query>', content, re.DOTALL)
892
- for dr_content in date_blocks:
893
- query_match = re.search(r"query='(.*?)'", dr_content)
894
- query = query_match.group(1) if query_match else "Unknown"
895
- gathered_info_summary += f"Date calculation results for '{query}':\n"
896
- date_val = re.search(r'<date>(.*?)</date>', dr_content, re.DOTALL)
897
- desc = re.search(r'<description>(.*?)</description>', dr_content, re.DOTALL)
898
- if date_val:
899
- gathered_info_summary += f"- Result: {date_val.group(1).strip()}\n"
900
- if desc: gathered_info_summary += f" Description: {desc.group(1).strip()}\n"
901
- elif desc:
902
- gathered_info_summary += f"- {desc.group(1).strip()}\n"
903
- elif "<error>" in dr_content:
904
- error_text = re.search(r'<error>(.*?)</error>', dr_content, re.DOTALL)
905
- gathered_info_summary += f"- Error during date calculation: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
906
- else:
907
- gathered_info_summary += "- No specific date result found.\n"
908
-
909
-
910
- # System Notes/Errors from Tool Execution (outside of specific tool blocks but within <tool_results>)
911
- system_notes_in_results_block = re.findall(r'<system_note>(.*?)</system_note>', content, re.DOTALL)
912
- for note in system_notes_in_results_block:
913
- # Add only if not already added from within a specific lookup block
914
- if f"System Note: {note.strip()}\n" not in gathered_info_summary and f"System Note within Lookup: {note.strip()}\n" not in gathered_info_summary:
915
- gathered_info_summary += f"System Note from Tool Results: {note.strip()}\n"
916
-
917
- system_errors_in_results_block = re.findall(r'<system_error>(.*?)</system_error>', content, re.DOTALL)
918
- for error_note in system_errors_in_results_block:
919
- gathered_info_summary += f"System Error from Tool Results: {error_note.strip()}\n"
920
-
921
- # Also check the raw model output (last assistant message) for system errors if tool results block wasn't generated
922
- last_assistant_message_content = model_chat_history[-1]['content'] if model_chat_history and model_chat_history[-1]['role'] == 'assistant' else ""
923
- system_errors_in_raw_output = re.findall(r'<system_error>(.*?)</system_error>', last_assistant_message_content, re.DOTALL)
924
- for error_note in system_errors_in_raw_output:
925
- # Add only if not already captured from within tool results block
926
- if f"System Error from Tool Results: {error_note.strip()}" not in gathered_info_summary:
927
- gathered_info_summary += f"System Error in model output: {error_note.strip()}\n"
928
-
929
- # Check for system notes/errors that might be outside <tool_results> but in the raw assistant output
930
- system_notes_in_raw_output = re.findall(r'<system_note>(.*?)</system_note>', last_assistant_message_content, re.DOTALL)
931
- for note in system_notes_in_raw_output:
932
- if f"System Note from Tool Results: {note.strip()}" not in gathered_info_summary and f"Business Lookup Note: {note.strip()}\n" not in gathered_info_summary: # Avoid duplicates
933
- gathered_info_summary += f"System Note in model output: {note.strip()}\n"
934
-
935
-
936
- if not gathered_info_summary.strip():
937
- gathered_info_summary = "No specific information was gathered using tools."
938
-
939
- # Add the synthesis prompt to the history for the final generation step
940
- # This keeps the history structure correct for apply_chat_template
941
- # IMPORTANT: This adds the synthesis prompt as the final USER message.
942
- # The model will then generate the final ASSISTANT response.
943
- temp_chat_history_for_synthesis = model_chat_history.copy() # Copy the history including tool results
944
-
945
- synthesis_prompt_formatted = synthesis_prompt_content.format(
946
- original_user_input=original_user_input,
947
- gathered_info_summary=gathered_info_summary.strip() # Add the summary of results
948
- )
949
-
950
- # Append the synthesis prompt as the final user message content
951
- # This maintains the user/assistant alternation (last was assistant, now user for synthesis instruction)
952
- temp_chat_history_for_synthesis.append({"role": "user", "content": synthesis_prompt_formatted.strip()})
953
-
954
- # Generate the final response using the history with the synthesis prompt
955
- print("Generating final synthesized response...")
956
- try:
957
- final_synthesis_response = client.chat_completion(
958
- messages=temp_chat_history_for_synthesis, # Use the history with the synthesis prompt
959
- max_tokens=1024, # Allow reasonable tokens for synthesis
960
- stream=False,
961
- temperature=0.5, # Allow a bit more creativity for synthesis
962
- top_p=0.95,
963
- )
964
-
965
- if final_synthesis_response and final_synthesis_response.choices:
966
- final_response_text = final_synthesis_response.choices[0].message.content.strip()
967
- print(f"Synthesized response generated: {final_response_text[:100]}...")
968
- else:
969
- final_response_text = "I was unable to generate a comprehensive answer based on the information gathered."
970
- print("Synthesis generation failed.")
971
- except Exception as e:
972
- print(f"Error during synthesis generation: {e}")
973
- print(traceback.format_exc())
974
- final_response_text = f"An error occurred while synthesizing the response: {e}"
975
-
976
-
977
- # Add Sources if any URLs were collected
978
- if unique_urls:
979
- final_response_text += "\n\nSources:\n" + "\n".join(sorted(list(unique_urls))) # Sort URLs alphabetically
980
-
981
-
982
- # Update the last message in chat_history_state with the final response
983
- # Find the last turn in the original chat_history_state (which was already updated with the placeholder)
984
- if chat_history_state and len(chat_history_state) > 0:
985
- chat_history_state[-1][1] = final_response_text # Update the bot's message in the last turn
986
-
987
- # Remove the [[TEXT]] marker from the chat_history_state *before* yielding
988
- # Iterate through the history and clean each item
989
- cleaned_chat_history_list_of_lists = []
990
- for user_msg, bot_msg in chat_history_state:
991
- cleaned_user_msg = user_msg.replace('[[TEXT]] ', '') if isinstance(user_msg, str) else user_msg
992
- cleaned_bot_msg = bot_msg.replace('[[TEXT]] ', '') if isinstance(bot_msg, str) else bot_msg
993
- cleaned_chat_history_list_of_lists.append([cleaned_user_msg, cleaned_bot_msg])
994
-
995
- # Convert the cleaned list of lists to a list of tuples as required by Gradio
996
- cleaned_chat_history_list_of_tuples = [(user_msg, bot_msg) for user_msg, bot_msg in cleaned_chat_history_list_of_lists]
997
-
998
-
999
- print(f"\n--- Final Response: {final_response_text[:100]}... ---") # Debug Print
1000
-
1001
- # Yield the cleaned history as a list of tuples
1002
- yield cleaned_chat_history_list_of_tuples # Yield the cleaned history as tuples
1003
 
 
 
 
1004
 
1005
- # ──────────────────────────
1006
- # 3 Gradio interface
1007
- # ──────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
 
1009
- # The Gradio interface definition remains the same as it correctly
1010
- # uses the updated respond function.
1011
 
 
1012
  print(f"RAG functionality available: {business_info_available}")
1013
 
1014
  demo = gr.ChatInterface(
1015
- fn=chat_with_bot, # Use the new chat_with_bot function
1016
  additional_inputs=[
1017
- gr.Textbox(value="You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.", label="System message (Note: This is less critical now as the system prompt is built internally for tool use.)", visible=False), # Hide this as system prompt is internal
1018
- gr.Slider(1, 4096, value=1024, step=1, label="Max new tokens (Note: Affects tool output processing and final answer length)"), # Increased max tokens
1019
- gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature (Note: Affects model's creativity, keep lower for reliable tool use)"),
1020
  gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (nucleus sampling)"),
1021
  ],
1022
- chatbot=gr.Chatbot(height=500), # Added height for better display
1023
- title="Gemma‑2‑9B‑IT Chat with RAG and Tool Use", # Updated title
1024
- description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API, with business info retrieved from Google Sheets and external search capabilities.", # Updated description
1025
- submit_btn="Send", # Renamed submit button
1026
- # Removed clear_btn as it caused a TypeError in the user's environment
1027
  )
1028
 
1029
- # Enable request queueing (concurrency handled automatically on Gradio ≥4)
1030
  demo.queue()
1031
 
1032
  if __name__ == "__main__":
1033
  # Authenticate and load data before launching the demo
 
1034
  if authenticate_google_sheets():
1035
  load_business_info()
1036
  else:
1037
  print("Google Sheets authentication failed. RAG functionality will not be available.")
1038
 
1039
- # The print statement for RAG status is added here, before launching the demo.
1040
  print(f"RAG functionality available: {business_info_available}")
1041
 
1042
- demo.launch(debug=True) # Added
 
 
 
1
+ # This script combines all components for deployment on Hugging Face Spaces.
 
2
 
3
+ # --- Imports ---
4
  import os
5
  import gradio as gr
6
  from huggingface_hub import InferenceClient
 
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
13
  from sentence_transformers import SentenceTransformer, util, CrossEncoder
14
  import gspread
15
+ # from google.colab import auth # Colab specific, remove for HF Spaces
16
+ # from google.auth import default # Colab specific, remove for HF Spaces
17
  from tqdm import tqdm
18
  from duckduckgo_search import DDGS
19
  import spacy
20
  from datetime import date, timedelta
21
+ from dateutil.relativedelta import relativedelta
22
+ import traceback
23
+ import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Suppress warnings
26
  warnings.filterwarnings("ignore", category=UserWarning)
27
 
28
+ # --- Global Variables and Secrets ---
29
+ # HF_TOKEN is automatically available in HF Spaces secrets
30
  HF_TOKEN = os.getenv("HF_TOKEN")
31
+ # GOOGLE_BASE64_CREDENTIALS should be added as a Space Secret
32
+ SHEET_ID = os.getenv("SHEET_ID") # Get SHEET_ID from Space Secrets
33
  GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
34
 
35
+ # --- Model and Tool Initialization ---
36
+ client = None # Initialize after HF_TOKEN is confirmed available
 
 
37
  nlp = None
38
+ embedder = None
39
+ reranker = None
40
+
41
  try:
42
+ # Initialize InferenceClient
43
+ if HF_TOKEN:
44
+ client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
45
+ print("Hugging Face Inference Client initialized.")
46
+ else:
47
+ print("Warning: HF_TOKEN not found. Inference Client not initialized.")
48
+
49
+ # Load spacy model for sentence splitting
50
  try:
 
51
  nlp = spacy.load("en_core_web_sm")
52
+ print("SpaCy model 'en_core_web_sm' loaded.")
53
+ except OSError:
54
+ print("SpaCy model 'en_core_web_sm' not found. Downloading...")
55
+ try:
56
+ # Use pip for installation in HF Spaces environment
57
+ os.system("pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz")
58
+ nlp = spacy.load("en_core_web_sm")
59
+ print("SpaCy model 'en_core_web_sm' downloaded and loaded.")
60
+ except Exception as e:
61
+ print(f"Failed to download or load SpaCy model: {e}")
62
 
63
+ # Load SentenceTransformer for RAG/business info retrieval
 
 
64
  print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
65
  embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
66
  print("Sentence Transformer loaded.")
 
 
 
67
 
68
+ # Load a Cross-Encoder model for re-ranking retrieved documents
 
 
69
  print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
70
  reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
71
  print("Cross-Encoder Reranker loaded.")
72
+
73
  except Exception as e:
74
+ print(f"An error occurred during model/tool initialization: {e}")
 
75
  print(traceback.format_exc())
 
76
 
77
 
78
+ # --- Google Sheets Authentication ---
79
  gc = None # Global variable for gspread client
80
  def authenticate_google_sheets():
81
  """Authenticates with Google Sheets using base64 encoded credentials."""
 
83
  print("Authenticating Google Account...")
84
  if not GOOGLE_BASE64_CREDENTIALS:
85
  print("Error: GOOGLE_BASE64_CREDENTIALS secret not found.")
86
+ print("Please add GOOGLE_BASE64_CREDENTIALS as a Space Secret.")
87
  return False
88
 
89
  try:
 
101
  print(traceback.format_exc())
102
  return False
103
 
104
+ # --- Google Sheets Data Loading and Embedding ---
105
+ data = [] # Global variable to store loaded data
 
106
  descriptions_for_embedding = []
107
  embeddings = torch.tensor([])
108
  business_info_available = False # Flag to indicate if business info was loaded successfully
109
 
110
  def load_business_info():
111
  """Loads business information from Google Sheet and creates embeddings."""
112
+ global data, descriptions_for_embedding, embeddings, business_info_available
113
  business_info_available = False # Reset flag
114
 
115
  if gc is None:
 
118
 
119
  if not SHEET_ID:
120
  print("Error: SHEET_ID not set.")
121
+ print("Please add SHEET_ID as a Space Secret.")
122
  return
123
 
124
  try:
 
128
 
129
  if not data_records:
130
  print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
131
+ data = []
132
  descriptions_for_embedding = []
133
  else:
134
  # Filter out rows missing 'Service' or 'Description'
135
  filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
136
  if not filtered_data:
137
  print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
138
+ data = []
139
  descriptions_for_embedding = []
140
  else:
141
+ data = filtered_data
 
142
  descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]
143
 
 
144
  if descriptions_for_embedding and embedder is not None:
145
  print("Encoding descriptions...")
146
  try:
 
149
  business_info_available = True # Set flag if successful
150
  except Exception as e:
151
  print(f"Error during description encoding: {e}")
152
+ embeddings = torch.tensor([])
153
  business_info_available = False # Encoding failed
154
  else:
155
  print("Skipping encoding descriptions: No descriptions found or embedder not available.")
156
+ embeddings = torch.tensor([])
157
+ business_info_available = False
158
 
159
  print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
160
  if not business_info_available:
 
163
  except gspread.exceptions.SpreadsheetNotFound:
164
  print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
165
  print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
166
+ business_info_available = False
167
  except Exception as e:
168
  print(f"An error occurred while accessing the Google Sheet: {e}")
169
  print(traceback.format_exc())
170
+ business_info_available = False
171
 
172
+ # --- Business Info Retrieval (RAG) ---
173
+ def retrieve_business_info(query: str, top_n: int = 3) -> list:
174
  """
175
  Retrieves relevant business information from loaded data based on a query.
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  """
177
  global data
178
  if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
179
  print("Business information retrieval is not available or data is empty.")
180
+ return []
181
 
182
  try:
183
  query_embedding = embedder.encode(query, convert_to_tensor=True)
184
  cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
185
+ top_results_indices = torch.topk(cosine_scores, k=min(top_n, len(data)))[1].tolist()
186
+ top_results = [data[i] for i in top_results_indices]
187
 
188
+ if reranker is not None and top_results:
189
+ print("Re-ranking top results...")
190
+ rerank_pairs = [(query, descriptions_for_embedding[i]) for i in top_results_indices]
 
 
 
 
 
 
 
 
 
 
191
  rerank_scores = reranker.predict(rerank_pairs)
192
+ reranked_indices = sorted(range(len(rerank_scores)), key=lambda i: rerank_scores[i], reverse=True)
193
+ reranked_results = [top_results[i] for i in reranked_indices]
 
 
 
194
  print("Re-ranking complete.")
195
+ return reranked_results
 
 
 
 
 
 
 
 
196
  else:
197
+ return top_results
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  except Exception as e:
200
  print(f"Error during business information retrieval: {e}")
201
  print(traceback.format_exc())
202
+ return []
 
203
 
204
+ # --- Tool Functions ---
205
  # Function to perform DuckDuckGo Search and return results with URLs
206
  def perform_duckduckgo_search(query: str, max_results: int = 5):
207
  """
 
212
  print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
213
  search_results_list = []
214
  try:
 
215
  time.sleep(1) # Sleep for 1 second
 
216
  with DDGS() as ddgs:
217
  if not query or len(query.split()) < 2:
218
  print(f"Skipping search for short query: '{query}'")
219
  return []
 
 
220
  results_generator = ddgs.text(query, max_results=max_results)
221
  results_found = False
222
  for r in results_generator:
223
  search_results_list.append(r)
224
  results_found = True
 
225
  if not results_found and max_results > 0:
226
  print(f"DuckDuckGo search for '{query}' returned no results.")
 
 
227
  except Exception as e:
228
  print(f"Error during Duckduckgo search for '{query}': {e}")
229
  return []
 
230
  return search_results_list
231
 
232
  # Function to perform date calculation if needed
 
234
  """
235
  Analyzes query for date calculation requests and performs the calculation.
236
  Returns a dict describing the calculation and result, or None.
 
 
237
  """
238
  print(f"Executing Tool: perform_date_calculation with query='{query}')")
239
  query_lower = query.lower()
 
292
  return {"query": query, "description": desc, "result": None, "success": False}
293
 
294
 
295
+ # --- Tool Definitions and System Prompt ---
 
 
296
  TOOL_DEFINITIONS = """
297
  Available tools:
298
  1. **search**: Use this tool to perform a web search for current external information. Useful for facts, news, weather, etc.
 
316
 
317
  """
318
 
 
 
 
 
319
  tool_use_system_template = """<system>
320
  You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.
321
 
 
356
  MAX_HISTORY_TURNS = 5 # Keep last 5 turns
357
 
358
 
359
+ # --- Chat Handler ---
360
+ def respond(
361
+ message: str,
362
+ history: list[tuple[str, str]],
363
+ system_message: str,
364
+ max_tokens: int,
365
+ temperature: float,
366
+ top_p: float,
367
+ ):
368
+ global client # Ensure client is accessible
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
+ if client is None:
371
+ yield "Error: Hugging Face Inference Client not initialized. Please check your HF_TOKEN Space Secret."
372
+ return
373
 
374
+ # Retrieve relevant business information based on the user's message
375
+ retrieved_info = retrieve_business_info(message)
376
+
377
+ # Build ChatML conversation
378
+ messages = [{"role": "system", "content": system_message}]
379
+
380
+ # Include retrieved information as context if available
381
+ if retrieved_info:
382
+ context_message = "Use the following business information to help answer the user's question if relevant:\n"
383
+ for i, info in enumerate(retrieved_info):
384
+ context_message += f"--- Business Info Entry {i+1} ---\n"
385
+ for key, value in info.items():
386
+ context_message += f"{key}: {str(value)}\n"
387
+ context_message += "---\n"
388
+ messages.append({"role": "user", "content": context_message})
389
+ print("Added retrieved business info to messages.")
390
+
391
+ # Add conversation history
392
+ for user_msg, bot_msg in history:
393
+ if user_msg:
394
+ messages.append({"role": "user", "content": user_msg})
395
+ if bot_msg:
396
+ messages.append({"role": "assistant", "content": bot_msg})
397
+
398
+ # Add the current user message
399
+ messages.append({"role": "user", "content": message})
400
+
401
+ # Stream tokens
402
+ response = ""
403
+ try:
404
+ for chunk in client.chat_completion(
405
+ messages=messages,
406
+ max_tokens=max_tokens,
407
+ stream=True,
408
+ temperature=temperature,
409
+ top_p=top_p,
410
+ ):
411
+ token = chunk.choices[0].delta.content or ""
412
+ response += token
413
+ yield response
414
+ except Exception as e:
415
+ print(f"Error during chat completion: {e}")
416
+ print(traceback.format_exc())
417
+ yield f"An error occurred: {e}"
418
 
 
 
419
 
420
+ # --- Gradio interface ---
421
  print(f"RAG functionality available: {business_info_available}")
422
 
423
  demo = gr.ChatInterface(
424
+ fn=respond,
425
  additional_inputs=[
426
+ gr.Textbox(value=tool_use_system_template.format(current_date=date.today().strftime('%Y-%m-%d'), tool_definitions=TOOL_DEFINITIONS), label="System message", interactive=False), # Use the template here
427
+ gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
428
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
429
  gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (nucleus sampling)"),
430
  ],
431
+ title="Gemma‑2‑9B‑IT Chat with RAG and Tools",
432
+ description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API, with business info retrieved from Google Sheets and external search/date tools.",
 
 
 
433
  )
434
 
435
+ # Enable request queueing (concurrency handled automatically on Gradio ≥ 4)
436
  demo.queue()
437
 
438
  if __name__ == "__main__":
439
  # Authenticate and load data before launching the demo
440
+ # These steps need to happen when the script is run
441
  if authenticate_google_sheets():
442
  load_business_info()
443
  else:
444
  print("Google Sheets authentication failed. RAG functionality will not be available.")
445
 
 
446
  print(f"RAG functionality available: {business_info_available}")
447
 
448
+ # Launch the Gradio interface
449
+ # debug=True is useful for development, remove for production
450
+ demo.launch()