Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
-
# This
|
2 |
-
# It includes all the code from the previous successful steps.
|
3 |
|
4 |
-
#
|
5 |
import os
|
6 |
import gradio as gr
|
7 |
from huggingface_hub import InferenceClient
|
@@ -13,80 +12,70 @@ import json
|
|
13 |
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
|
14 |
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
15 |
import gspread
|
16 |
-
# from google.colab import auth
|
17 |
-
from google.auth import default
|
18 |
from tqdm import tqdm
|
19 |
from duckduckgo_search import DDGS
|
20 |
import spacy
|
21 |
from datetime import date, timedelta
|
22 |
-
from dateutil.relativedelta import relativedelta
|
23 |
-
import traceback
|
24 |
-
import base64
|
25 |
-
|
26 |
-
|
27 |
-
# Add PyTorch version and CUDA availability check
|
28 |
-
print(f"PyTorch version: {torch.__version__}")
|
29 |
-
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
30 |
-
if torch.cuda.is_available():
|
31 |
-
print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
|
32 |
-
|
33 |
-
# Optional: Add check for torchvision version if installed
|
34 |
-
try:
|
35 |
-
import torchvision
|
36 |
-
print(f"Torchvision version: {torchvision.__version__}")
|
37 |
-
except ImportError:
|
38 |
-
print("Torchvision not installed.")
|
39 |
|
40 |
# Suppress warnings
|
41 |
warnings.filterwarnings("ignore", category=UserWarning)
|
42 |
|
43 |
-
#
|
|
|
44 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
45 |
-
|
|
|
46 |
GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
|
47 |
|
48 |
-
#
|
49 |
-
client =
|
50 |
-
|
51 |
-
# Load spacy model for sentence splitting
|
52 |
nlp = None
|
|
|
|
|
|
|
53 |
try:
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
58 |
try:
|
59 |
-
os.system("python -m spacy download en_core_web_sm")
|
60 |
nlp = spacy.load("en_core_web_sm")
|
61 |
-
print("SpaCy model 'en_core_web_sm'
|
62 |
-
except
|
63 |
-
print(
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
# Load SentenceTransformer for RAG/business info retrieval
|
67 |
-
embedder = None
|
68 |
-
try:
|
69 |
print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
|
70 |
embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
|
71 |
print("Sentence Transformer loaded.")
|
72 |
-
except Exception as e:
|
73 |
-
print(f"Error loading Sentence Transformer: {e}")
|
74 |
-
|
75 |
|
76 |
-
# Load a Cross-Encoder model for re-ranking retrieved documents
|
77 |
-
reranker = None
|
78 |
-
try:
|
79 |
print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
|
80 |
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
|
81 |
print("Cross-Encoder Reranker loaded.")
|
|
|
82 |
except Exception as e:
|
83 |
-
print(f"
|
84 |
-
print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
|
85 |
print(traceback.format_exc())
|
86 |
-
reranker = None
|
87 |
|
88 |
|
89 |
-
# Google Sheets Authentication
|
90 |
gc = None # Global variable for gspread client
|
91 |
def authenticate_google_sheets():
|
92 |
"""Authenticates with Google Sheets using base64 encoded credentials."""
|
@@ -94,6 +83,7 @@ def authenticate_google_sheets():
|
|
94 |
print("Authenticating Google Account...")
|
95 |
if not GOOGLE_BASE64_CREDENTIALS:
|
96 |
print("Error: GOOGLE_BASE64_CREDENTIALS secret not found.")
|
|
|
97 |
return False
|
98 |
|
99 |
try:
|
@@ -111,16 +101,15 @@ def authenticate_google_sheets():
|
|
111 |
print(traceback.format_exc())
|
112 |
return False
|
113 |
|
114 |
-
# Google Sheets Data Loading and Embedding
|
115 |
-
|
116 |
-
data = [] # Global variable to store loaded data - Renamed to 'data'
|
117 |
descriptions_for_embedding = []
|
118 |
embeddings = torch.tensor([])
|
119 |
business_info_available = False # Flag to indicate if business info was loaded successfully
|
120 |
|
121 |
def load_business_info():
|
122 |
"""Loads business information from Google Sheet and creates embeddings."""
|
123 |
-
global data, descriptions_for_embedding, embeddings, business_info_available
|
124 |
business_info_available = False # Reset flag
|
125 |
|
126 |
if gc is None:
|
@@ -129,6 +118,7 @@ def load_business_info():
|
|
129 |
|
130 |
if not SHEET_ID:
|
131 |
print("Error: SHEET_ID not set.")
|
|
|
132 |
return
|
133 |
|
134 |
try:
|
@@ -138,21 +128,19 @@ def load_business_info():
|
|
138 |
|
139 |
if not data_records:
|
140 |
print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
|
141 |
-
data = []
|
142 |
descriptions_for_embedding = []
|
143 |
else:
|
144 |
# Filter out rows missing 'Service' or 'Description'
|
145 |
filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
|
146 |
if not filtered_data:
|
147 |
print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
|
148 |
-
data = []
|
149 |
descriptions_for_embedding = []
|
150 |
else:
|
151 |
-
data = filtered_data
|
152 |
-
# Use BOTH Service and Description for embedding
|
153 |
descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]
|
154 |
|
155 |
-
# Only encode if descriptions_for_embedding are found and embedder is available
|
156 |
if descriptions_for_embedding and embedder is not None:
|
157 |
print("Encoding descriptions...")
|
158 |
try:
|
@@ -161,12 +149,12 @@ def load_business_info():
|
|
161 |
business_info_available = True # Set flag if successful
|
162 |
except Exception as e:
|
163 |
print(f"Error during description encoding: {e}")
|
164 |
-
embeddings = torch.tensor([])
|
165 |
business_info_available = False # Encoding failed
|
166 |
else:
|
167 |
print("Skipping encoding descriptions: No descriptions found or embedder not available.")
|
168 |
-
embeddings = torch.tensor([])
|
169 |
-
business_info_available = False
|
170 |
|
171 |
print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
|
172 |
if not business_info_available:
|
@@ -175,88 +163,45 @@ def load_business_info():
|
|
175 |
except gspread.exceptions.SpreadsheetNotFound:
|
176 |
print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
|
177 |
print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
|
178 |
-
business_info_available = False
|
179 |
except Exception as e:
|
180 |
print(f"An error occurred while accessing the Google Sheet: {e}")
|
181 |
print(traceback.format_exc())
|
182 |
-
business_info_available = False
|
183 |
|
184 |
-
# Business Info Retrieval (RAG)
|
185 |
-
def retrieve_business_info(query: str,
|
186 |
"""
|
187 |
Retrieves relevant business information from loaded data based on a query.
|
188 |
-
|
189 |
-
Args:
|
190 |
-
query: The user's query string.
|
191 |
-
threshold: Minimum relevance score for a match.
|
192 |
-
max_matches: The maximum number of top relevant entries to retrieve *before* thresholding.
|
193 |
-
|
194 |
-
Returns:
|
195 |
-
A tuple containing:
|
196 |
-
- A list of dictionaries, where each dictionary is a relevant row from the
|
197 |
-
Google Sheet data that meets the threshold.
|
198 |
-
- The score of the best match found (even if below threshold).
|
199 |
-
Returns an empty list and score 0.0 if RAG is not available or
|
200 |
-
no relevant information is found.
|
201 |
"""
|
202 |
global data
|
203 |
if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
|
204 |
print("Business information retrieval is not available or data is empty.")
|
205 |
-
return []
|
206 |
|
207 |
try:
|
208 |
query_embedding = embedder.encode(query, convert_to_tensor=True)
|
209 |
cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
|
|
|
|
|
210 |
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
descriptions_pre_rerank = [descriptions_for_embedding[i] for i in top_n_indices_pre_rerank]
|
215 |
-
|
216 |
-
best_score_overall = torch.max(cosine_scores).item() if len(cosine_scores) > 0 else 0.0
|
217 |
-
|
218 |
-
|
219 |
-
# Optional: Re-rank the top results using the Cross-Encoder
|
220 |
-
reranked_results_with_scores = []
|
221 |
-
if reranker is not None and top_n_pre_rerank_results:
|
222 |
-
print("Re-ranking top results before thresholding...")
|
223 |
-
rerank_pairs = [(query, desc) for desc in descriptions_pre_rerank]
|
224 |
rerank_scores = reranker.predict(rerank_pairs)
|
225 |
-
|
226 |
-
|
227 |
-
paired_results_with_scores = list(zip(top_n_pre_rerank_results, rerank_scores))
|
228 |
-
reranked_paired_results = sorted(paired_results_with_scores, key=lambda item: item[1], reverse=True)
|
229 |
-
|
230 |
print("Re-ranking complete.")
|
231 |
-
|
232 |
-
filtered_results = []
|
233 |
-
for item, score in reranked_paired_results:
|
234 |
-
if score >= threshold:
|
235 |
-
filtered_results.append(item)
|
236 |
-
if len(filtered_results) >= max_matches: # Apply max_matches here
|
237 |
-
break
|
238 |
-
return filtered_results, best_score_overall
|
239 |
-
|
240 |
else:
|
241 |
-
|
242 |
-
print("Reranker not available or no results to rerank. Applying threshold and max_matches to cosine scores.")
|
243 |
-
filtered_results = []
|
244 |
-
sorted_cosine_scores, sorted_indices = torch.sort(cosine_scores, descending=True)
|
245 |
-
for i in range(min(max_matches, len(data))): # Take top N based on cosine
|
246 |
-
if sorted_cosine_scores[i].item() >= threshold: # Check threshold
|
247 |
-
filtered_results.append(data[sorted_indices[i].item()])
|
248 |
-
else:
|
249 |
-
break # Stop if score drops below threshold
|
250 |
-
|
251 |
-
return filtered_results, best_score_overall
|
252 |
-
|
253 |
|
254 |
except Exception as e:
|
255 |
print(f"Error during business information retrieval: {e}")
|
256 |
print(traceback.format_exc())
|
257 |
-
return []
|
258 |
-
|
259 |
|
|
|
260 |
# Function to perform DuckDuckGo Search and return results with URLs
|
261 |
def perform_duckduckgo_search(query: str, max_results: int = 5):
|
262 |
"""
|
@@ -267,29 +212,21 @@ def perform_duckduckgo_search(query: str, max_results: int = 5):
|
|
267 |
print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
|
268 |
search_results_list = []
|
269 |
try:
|
270 |
-
# Add a delay before each search
|
271 |
time.sleep(1) # Sleep for 1 second
|
272 |
-
|
273 |
with DDGS() as ddgs:
|
274 |
if not query or len(query.split()) < 2:
|
275 |
print(f"Skipping search for short query: '{query}'")
|
276 |
return []
|
277 |
-
|
278 |
-
# Use text() method for general text search
|
279 |
results_generator = ddgs.text(query, max_results=max_results)
|
280 |
results_found = False
|
281 |
for r in results_generator:
|
282 |
search_results_list.append(r)
|
283 |
results_found = True
|
284 |
-
|
285 |
if not results_found and max_results > 0:
|
286 |
print(f"DuckDuckGo search for '{query}' returned no results.")
|
287 |
-
|
288 |
-
|
289 |
except Exception as e:
|
290 |
print(f"Error during Duckduckgo search for '{query}': {e}")
|
291 |
return []
|
292 |
-
|
293 |
return search_results_list
|
294 |
|
295 |
# Function to perform date calculation if needed
|
@@ -297,8 +234,6 @@ def perform_date_calculation(query: str):
|
|
297 |
"""
|
298 |
Analyzes query for date calculation requests and performs the calculation.
|
299 |
Returns a dict describing the calculation and result, or None.
|
300 |
-
Handles formats like 'X days ago', 'X days from now', 'X weeks ago', 'X weeks from now', 'what is today's date'.
|
301 |
-
Uses dateutil for slightly more flexibility (though core logic remains simple).
|
302 |
"""
|
303 |
print(f"Executing Tool: perform_date_calculation with query='{query}')")
|
304 |
query_lower = query.lower()
|
@@ -357,9 +292,7 @@ def perform_date_calculation(query: str):
|
|
357 |
return {"query": query, "description": desc, "result": None, "success": False}
|
358 |
|
359 |
|
360 |
-
# --- Tool Definitions
|
361 |
-
# Describe the tools available to the model in a structured format
|
362 |
-
# This will be injected into the prompt.
|
363 |
TOOL_DEFINITIONS = """
|
364 |
Available tools:
|
365 |
1. **search**: Use this tool to perform a web search for current external information. Useful for facts, news, weather, etc.
|
@@ -383,10 +316,6 @@ Available tools:
|
|
383 |
|
384 |
"""
|
385 |
|
386 |
-
# --- System Prompt Template for Tool Use ---
|
387 |
-
# This template instructs the model on how to use the tools and format its output.
|
388 |
-
# Inject this *within* the user message content.
|
389 |
-
# MODIFIED to ask for COMPREHENSIVE answers
|
390 |
tool_use_system_template = """<system>
|
391 |
You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.
|
392 |
|
@@ -427,616 +356,95 @@ Output ONLY tool calls within <tool_code> tags or a final answer using the 'answ
|
|
427 |
MAX_HISTORY_TURNS = 5 # Keep last 5 turns
|
428 |
|
429 |
|
430 |
-
# --- Chat
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
# Basic Input Safety Check (Example)
|
441 |
-
if any(phrase in user_input.lower() for phrase in ["harmful content", "malicious intent"]):
|
442 |
-
safe_response = "I cannot process requests that involve harmful or inappropriate content."
|
443 |
-
yield chat_history_state + [[user_input, safe_response]] # Yield immediately for display
|
444 |
-
return # Exit the generator
|
445 |
-
|
446 |
-
# Append user message to history immediately for display
|
447 |
-
# The bot message will be updated iteratively
|
448 |
-
# We append a placeholder now, and update it with the final response later.
|
449 |
-
# Ensure the initial yield is a list of lists for Gradio, even if it's just one turn
|
450 |
-
initial_history = chat_history_state + [[user_input, "..."]]
|
451 |
-
yield initial_history # Yield state with placeholder
|
452 |
-
|
453 |
-
original_user_input = user_input
|
454 |
-
print(f"\n--- Starting turn with input: {user_input} ---") # Debug Print
|
455 |
-
|
456 |
-
# Get current date
|
457 |
-
current_date = date.today().strftime('%Y-%m-%d')
|
458 |
-
print(f"Current Date: {current_date}") # Debug Print
|
459 |
-
|
460 |
-
# Initialize unique_urls here to avoid UnboundLocalError
|
461 |
-
unique_urls = set() # Collect URLs for Sources section
|
462 |
-
|
463 |
-
# Maintain an internal model history that strictly alternates user/assistant roles
|
464 |
-
# This history will be used directly by apply_chat_template.
|
465 |
-
# It represents the conversation *as the model sees it*, including tool calls/results.
|
466 |
-
# Build this history from the *completed* past turns from chat_history_state.
|
467 |
-
model_chat_history = []
|
468 |
-
|
469 |
-
# Convert Gradio chat history (list of lists) to model history (list of dicts)
|
470 |
-
# Ensure strict alternation: user, assistant, user, assistant...
|
471 |
-
# Only add complete turns from the *past* history (exclude the current incomplete turn)
|
472 |
-
# Limit the history length
|
473 |
-
history_to_process = chat_history_state # Use the full history passed to the function initially
|
474 |
-
|
475 |
-
# Ensure we only take pairs [user, bot] from past history where bot is NOT the initial placeholder
|
476 |
-
# This guarantees that the last message in `recent_complete_turns` corresponds to a *completed* assistant response.
|
477 |
-
complete_past_turns = [
|
478 |
-
turn for turn in history_to_process
|
479 |
-
if turn is not None and len(turn) == 2 and turn[0] is not None and turn[1] is not None and str(turn[1]).strip() != "..."
|
480 |
-
]
|
481 |
-
|
482 |
-
# Take the last MAX_HISTORY_TURNS complete turns
|
483 |
-
recent_complete_turns = complete_past_turns[max(0, len(complete_past_turns) - MAX_HISTORY_TURNS):]
|
484 |
-
|
485 |
-
for user_msg, bot_msg in recent_complete_turns:
|
486 |
-
# Add user message (must be present)
|
487 |
-
if user_msg is not None: # Should always be True based on complete_past_turns filter
|
488 |
-
model_chat_history.append({"role": "user", "content": str(user_msg).strip()})
|
489 |
-
# Add assistant message (must be present and non-placeholder based on complete_past_turns filter)
|
490 |
-
if bot_msg is not None and str(bot_msg).strip() != "...": # Should always be True based on filter
|
491 |
-
model_chat_history.append({"role": "assistant", "content": str(bot_msg).strip()})
|
492 |
-
|
493 |
-
# Initialize variables for the tool-use loop
|
494 |
-
max_tool_iterations = 5 # Limit the number of tool calls in a single turn to prevent infinite loops
|
495 |
-
final_response_text = None # Variable to hold the final answer from the 'answer' tool
|
496 |
-
current_tool_results_text = "" # Accumulate tool results text for the *next* model call in this turn
|
497 |
-
|
498 |
-
print("Starting tool execution loop...")
|
499 |
-
|
500 |
-
try: # This is the main try block for the chat_with_bot function
|
501 |
-
for i in range(max_tool_iterations):
|
502 |
-
print(f"\n--- Tool Iteration {i+1} ---")
|
503 |
-
|
504 |
-
# Step 1 & 2: Prepare the user message content for THIS iteration and append to history
|
505 |
-
# The content of the user message for this iteration depends on whether it's the first step
|
506 |
-
# (original query + system prompt) or a subsequent step (tool results).
|
507 |
-
current_user_message_content = ""
|
508 |
-
if i == 0:
|
509 |
-
# First iteration: Include the system template and the original user input
|
510 |
-
system_prompt_content = tool_use_system_template.format(
|
511 |
-
current_date=current_date,
|
512 |
-
tool_definitions=TOOL_DEFINITIONS
|
513 |
-
)
|
514 |
-
current_user_message_content = system_prompt_content + "\n\nUser Query: " + original_user_input
|
515 |
-
else:
|
516 |
-
# Subsequent iterations: Include the tool results from the previous assistant response.
|
517 |
-
if current_tool_results_text:
|
518 |
-
current_user_message_content = "<tool_results>\n" + current_tool_results_text.strip() + "\n</tool_results>"
|
519 |
-
current_tool_results_text = "" # Clear the buffer after adding to the prompt
|
520 |
-
else:
|
521 |
-
# If no new tool results were accumulated in the previous step (e.g., parsing failed, no tools called),
|
522 |
-
# send a message indicating this so the model doesn't wait indefinitely.
|
523 |
-
current_user_message_content = "<tool_results>No new results or no tools were called in the previous turn.</tool_results>"
|
524 |
-
print("No new tool results to add for this iteration.")
|
525 |
-
|
526 |
-
# Append the user message for the current iteration to the main model history.
|
527 |
-
# This history is what apply_chat_template will process.
|
528 |
-
# If the logic is correct, model_chat_history should always end with an 'assistant' role
|
529 |
-
# before this append, except for the very first turn of the conversation.
|
530 |
-
model_chat_history.append({"role": "user", "content": current_user_message_content.strip()})
|
531 |
-
|
532 |
-
|
533 |
-
# Step 3 & 4: Apply template to get the full prompt and Generate model output
|
534 |
-
# The history `model_chat_history` should now be in the correct state for generation:
|
535 |
-
# starting with 'user' and ending with the current 'user' message.
|
536 |
-
# The check below verifies the strict alternation before tokenization.
|
537 |
-
if len(model_chat_history) > 1 and model_chat_history[-1]['role'] == model_chat_history[-2]['role']:
|
538 |
-
print("Error: History roles are not alternating before generation!")
|
539 |
-
print("History:", model_chat_history)
|
540 |
-
final_response_text = "Sorry, I encountered an internal error with the conversation history format before generation."
|
541 |
-
break # Break the tool loop if history is malformed
|
542 |
-
|
543 |
-
|
544 |
-
prompt_for_generation = client.chat_completion(
|
545 |
-
messages=model_chat_history, # Use the main model_chat_history directly
|
546 |
-
max_tokens=max_new_tokens, # Use max_new_tokens parameter
|
547 |
-
stream=False, # Use non-streaming for tool parsing loop
|
548 |
-
temperature=temperature, # Use temperature parameter
|
549 |
-
top_p=top_p, # Use top_p parameter
|
550 |
-
# Add tool_choice="auto" or specific tool names if the API supports it
|
551 |
-
# For Gemma-2-IT, we rely on prompt engineering for tool calls.
|
552 |
-
)
|
553 |
-
|
554 |
-
raw_model_output = ""
|
555 |
-
if prompt_for_generation and prompt_for_generation.choices:
|
556 |
-
raw_model_output = prompt_for_generation.choices[0].message.content.strip()
|
557 |
-
else:
|
558 |
-
print("Model returned an empty response or no choices.")
|
559 |
-
raw_model_output = "<system_error>Error: Model returned empty response.</system_error>" # Report error via system tag
|
560 |
-
|
561 |
-
|
562 |
-
print(f"Raw model output: {raw_model_output}")
|
563 |
-
|
564 |
-
# Step 5: Append the model's raw output as the assistant message for THIS iteration
|
565 |
-
# This is crucial for maintaining the alternation in `model_chat_history`
|
566 |
-
model_chat_history.append({"role": "assistant", "content": raw_model_output.strip()})
|
567 |
-
|
568 |
-
|
569 |
-
# Step 6: Parse Tool Calls from the latest assistant message (which is now the last entry in history)
|
570 |
-
tool_calls = []
|
571 |
-
# Use regex to find all content within <tool_code> tags in the latest assistant message
|
572 |
-
matches = re.findall(r'<tool_code>(.*?)</tool_code>', model_chat_history[-1]['content'], re.DOTALL)
|
573 |
-
|
574 |
-
if not matches:
|
575 |
-
print("No tool calls found in latest model output.")
|
576 |
-
# If no tool calls, check if the model tried to output an answer directly
|
577 |
-
# This is a fallback if the model fails to use the 'answer' tool.
|
578 |
-
# Apply cleanup patterns just to the latest assistant message to see if it's a potential answer
|
579 |
-
cleaned_potential_answer = re.sub(r'<tool_code>.*?</tool_code>', '', model_chat_history[-1]['content'], flags=re.DOTALL) # Remove tool tags first
|
580 |
-
cleaned_potential_answer = re.sub(r'<.*?>', '', cleaned_potential_answer).strip() # Remove any other potential tags
|
581 |
-
|
582 |
-
# If the cleaned output is not empty or just whitespace, treat it as a potential final answer
|
583 |
-
if cleaned_potential_answer and final_response_text is None:
|
584 |
-
print("Model output does not contain tool calls, treating cleaned output as potential direct answer.")
|
585 |
-
final_response_text = cleaned_potential_answer
|
586 |
-
break # Exit the tool loop as we have a response
|
587 |
-
|
588 |
-
# If no tool calls and not a potential answer, check for explicit system errors reported by the model
|
589 |
-
if "<system_error>" in model_chat_history[-1]['content'] or "<error>" in model_chat_history[-1]['content']:
|
590 |
-
print("Model output contains system error tags. Exiting tool loop.")
|
591 |
-
# The synthesis step will pick up these errors from the history
|
592 |
-
break # Exit loop on critical error reported by the model
|
593 |
-
|
594 |
-
|
595 |
-
# If no tool calls and not a potential answer, and no explicit error, the loop might continue.
|
596 |
-
# The next iteration's user message content will be generated as "No new results..."
|
597 |
-
continue # Skip to the next iteration
|
598 |
-
|
599 |
-
|
600 |
-
# Step 7: Execute Tool Calls and accumulate results for the *next* iteration's user message
|
601 |
-
# We clear the buffer here, as we are processing the *latest* assistant message's tools.
|
602 |
-
current_tool_results_text = ""
|
603 |
-
answer_tool_called_in_this_iter = False # Reset flag for this iteration's output
|
604 |
-
|
605 |
-
for match in matches:
|
606 |
-
try:
|
607 |
-
# Attempt to parse the content within the tags as JSON
|
608 |
-
tool_call_json = json.loads(match.strip())
|
609 |
-
if "tool_name" in tool_call_json and "parameters" in tool_call_json:
|
610 |
-
tool_name = tool_call_json.get("tool_name")
|
611 |
-
parameters = tool_call_json.get("parameters", {})
|
612 |
-
|
613 |
-
if tool_name == "answer":
|
614 |
-
final_response_text = parameters.get("text", "")
|
615 |
-
answer_tool_called_in_this_iter = True
|
616 |
-
print(f"Model called 'answer' tool. Final response intended: '{final_response_text}'")
|
617 |
-
# Once the 'answer' tool is called, we prioritize exiting the loop after this iteration.
|
618 |
-
# We still process any other tool calls in this *same* model output, but then break the loop afterwards.
|
619 |
-
continue # Process next tool call in the same output (from the same model output)
|
620 |
-
|
621 |
-
|
622 |
-
elif tool_name == "search":
|
623 |
-
query = parameters.get("query")
|
624 |
-
max_results = parameters.get("max_results", 5)
|
625 |
-
if query:
|
626 |
-
print(f"Executing Tool: search with query='{query}', max_results={max_results}")
|
627 |
-
results = perform_duckduckgo_search(query, max_results)
|
628 |
-
current_tool_results_text += f"<search_results_for_query query='{query}'>\n"
|
629 |
-
if results:
|
630 |
-
for r in results:
|
631 |
-
snippet = r.get('body', 'N/A')
|
632 |
-
if len(snippet) > 300:
|
633 |
-
snippet = snippet[:300] + "..."
|
634 |
-
current_tool_results_text += f"<item>\n<title>{r.get('title', 'N/A')}</title>\n<snippet>{snippet}</snippet>\n<url>{r.get('href', 'N/A')}</url>\n</item>\n"
|
635 |
-
print(f"Executed search for '{query}'. Found {len(results)} results.")
|
636 |
-
else:
|
637 |
-
current_tool_results_text += "No results found.\n"
|
638 |
-
print(f"No search results found for '{query}'.")
|
639 |
-
current_tool_results_text += "</search_results_for_query>\n"
|
640 |
-
|
641 |
-
else:
|
642 |
-
current_tool_results_text += f"<search_results_for_query query='{query}'><error>Missing 'query' parameter.</error></search_results_for_query>\n"
|
643 |
-
print(f"Skipping search tool call: Missing 'query' parameter.")
|
644 |
-
|
645 |
-
|
646 |
-
elif tool_name == "lookup_business_info":
|
647 |
-
query = parameters.get("query")
|
648 |
-
# Use the threshold and max_matches provided by the model, or the defaults
|
649 |
-
threshold = parameters.get("threshold", 0.50)
|
650 |
-
max_matches = parameters.get("max_matches", 5) # Use max_matches parameter
|
651 |
-
if query:
|
652 |
-
print(f"Executing Tool: lookup_business_info with query='{query}', threshold={threshold:.4f}, max_matches={max_matches}")
|
653 |
-
# retrieve_business_info now returns a LIST of matches and the best score
|
654 |
-
matches_list, best_score = retrieve_business_info(query, threshold=threshold, max_matches=max_matches)
|
655 |
-
# MODIFIED: Format results block to contain MULTIPLE match tags
|
656 |
-
current_tool_results_text += f"<lookup_business_info_results_for_query query='{query}' requested_threshold='{threshold:.4f}' requested_max_matches='{max_matches}' final_best_score='{best_score:.4f}'>\n"
|
657 |
-
if matches_list: # Check if the list is not empty
|
658 |
-
for match in matches_list: # Iterate through the list of matches
|
659 |
-
if isinstance(match, dict): # Ensure it's a dictionary
|
660 |
-
current_tool_results_text += f"<match>\n"
|
661 |
-
current_tool_results_text += f"<service>{match.get('Service', 'N/A')}</service>\n"
|
662 |
-
current_tool_results_text += f"<description>{match.get('Description', 'N/A')}</description>\n"
|
663 |
-
current_tool_results_text += f"<price>{match.get('Price', 'N/A')}</price>\n"
|
664 |
-
current_tool_results_text += f"<available>{match.get('Available', 'N/A')}</available>\n"
|
665 |
-
# Add other relevant fields from your sheet here if needed for synthesis
|
666 |
-
# e.g., <contact_person> etc.
|
667 |
-
current_tool_results_text += f"</match>\n"
|
668 |
-
# Optionally add a note if any item in the list was not a dict
|
669 |
-
else:
|
670 |
-
print(f"Warning: Item in retrieved_business_info list was not a dict: {match}")
|
671 |
-
|
672 |
-
|
673 |
-
print(f"Executed business lookup for '{query}'. Found {len(matches_list)} matches above threshold {threshold:.4f}. Best score: {best_score:.4f}.")
|
674 |
-
else:
|
675 |
-
# This case covers No matches found above threshold within retrieve_business_info
|
676 |
-
current_tool_results_text += f"No relevant matches found above threshold {threshold:.4f} (best score: {best_score:.4f}).\n"
|
677 |
-
print(f"Executed business lookup for '{query}'. No matches found above threshold.")
|
678 |
-
# Add a note about the best score being below threshold
|
679 |
-
if best_score > 0: # Only add note if *some* match was found, but not above threshold
|
680 |
-
current_tool_results_text += f"<system_note>Best match score ({best_score:.4f}) was below the requested threshold ({threshold:.4f}).</system_note>\n"
|
681 |
-
|
682 |
-
current_tool_results_text += "</lookup_business_info_results_for_query>\n"
|
683 |
-
else:
|
684 |
-
current_tool_results_text += f"<lookup_business_info_results_for_query query='{query}'><error>Missing 'query' parameter.</error></lookup_business_info_results_for_query>\n"
|
685 |
-
print(f"Skipping business lookup tool call: Missing 'query' parameter.")
|
686 |
-
|
687 |
-
|
688 |
-
elif tool_name == "perform_date_calculation":
|
689 |
-
query = parameters.get("query")
|
690 |
-
if query:
|
691 |
-
print(f"Executing Tool: perform_date_calculation with query='{query}'")
|
692 |
-
result = perform_date_calculation(query) # This function already returns a dict or error
|
693 |
-
current_tool_results_text += f"<perform_date_calculation_results_for_query query='{query}'>\n"
|
694 |
-
if result and result.get('success'): # Check the 'success' key
|
695 |
-
current_tool_results_text += f"<description>{result.get('description', 'Calculation Successful')}</description>\n<date>{result.get('result')}</date>\n"
|
696 |
-
print(f"Executed date calculation for '{query}'. Result: {result.get('result')}.")
|
697 |
-
elif result and result.get('description'):
|
698 |
-
current_tool_results_text += f"<description>{result.get('description')}</description>\n" # Report description if result is None or not success
|
699 |
-
print(f"Executed date calculation for '{query}'. Failed: {result.get('description')}.")
|
700 |
-
elif isinstance(result, str) and result.startswith("Error"):
|
701 |
-
current_tool_results_text += f"<error>{result}</error>\n" # Report error string
|
702 |
-
print(f"Executed date calculation for '{query}'. Error: {result}.")
|
703 |
-
else: # Generic failure case
|
704 |
-
current_tool_results_text += "Calculation failed or no specific date recognized.\n"
|
705 |
-
print(f"Executed date calculation for '{query}'. No specific result.")
|
706 |
-
current_tool_results_text += "</perform_date_calculation_results_for_query>\n"
|
707 |
-
else:
|
708 |
-
current_tool_results_text += f"<perform_date_calculation_results_for_query query='{query}'><error>Missing 'query' parameter.</error></lookup_business_info_results_for_query>\n"
|
709 |
-
print(f"Skipping date calculation tool call: Missing 'query' parameter.")
|
710 |
-
|
711 |
-
|
712 |
-
else:
|
713 |
-
print(f"Unknown tool requested by model: {tool_name}")
|
714 |
-
# Add a note to results buffer about the unknown tool
|
715 |
-
current_tool_results_text += f"<system_note>Unknown tool requested: {tool_name}</system_note>\n"
|
716 |
-
|
717 |
-
else:
|
718 |
-
print(f"Parsed JSON missing 'tool_name' or 'parameters': {tool_call_json}")
|
719 |
-
current_tool_results_text += f"<system_note>Failed to parse tool call: Missing 'tool_name' or 'parameters' in JSON: {match.strip()}</system_note>\n"
|
720 |
-
except json.JSONDecodeError as e:
|
721 |
-
print(f"Failed to parse tool call JSON: {e}")
|
722 |
-
print(f"Content was: {match.strip()}")
|
723 |
-
current_tool_results_text += f"<system_note>Failed to parse tool call JSON: {e}. Content: {match.strip()}</system_note>\n"
|
724 |
-
except Exception as e:
|
725 |
-
print(f"An unexpected error occurred during tool execution for call '{tool_call_json}': {e}") # Changed tool_call_json_str to tool_call_json
|
726 |
-
print(traceback.format_exc()) # Print traceback for tool execution errors
|
727 |
-
current_tool_results_text += f"<system_note>An unexpected error occurred during tool call processing: {e}. Content: {match.strip()}</system_note>\n"
|
728 |
-
|
729 |
-
|
730 |
-
# Step 8: Check if the 'answer' tool was called in this iteration
|
731 |
-
if answer_tool_called_in_this_iter:
|
732 |
-
print("Answer tool called. Exiting tool loop.")
|
733 |
-
break # Exit the main tool iteration loop
|
734 |
-
|
735 |
-
# Step 9: If max iterations reached and 'answer' tool wasn't called
|
736 |
-
if i == max_tool_iterations - 1 and final_response_text is None:
|
737 |
-
print(f"Max tool iterations reached ({max_tool_iterations}) without 'answer' call.")
|
738 |
-
# Add a final note to the results buffer so the model sees it in the last forced synthesis step
|
739 |
-
current_tool_results_text += "<system_note>Maximum tool calls reached. Please provide a final answer based on the information gathered so far or state that the request cannot be fully fulfilled.</system_note>\n"
|
740 |
-
# Fall through to the final response generation step below
|
741 |
-
|
742 |
-
|
743 |
-
# --- End of the main try block for chat_with_bot ---
|
744 |
-
# THIS EXCEPT BLOCK NEEDS TO BE AT THE SAME INDENTATION LEVEL AS THE 'try' ABOVE
|
745 |
-
except Exception as e: # This except matches the 'try' block at the beginning of the function
|
746 |
-
print(f"An unexpected error occurred in the chat_with_bot function: {e}")
|
747 |
-
print(traceback.format_exc()) # Print full traceback for debugging
|
748 |
-
final_response_text = f"Sorry, I encountered an unexpected error while processing your request: {e}"
|
749 |
-
# In case of error, ensure final_response_text is set so we proceed to update history
|
750 |
-
|
751 |
-
|
752 |
-
# The code below runs AFTER the tool iteration loop and its enclosing try/except finishes
|
753 |
-
|
754 |
-
# --- Final Response Generation (Synthesis) ---
|
755 |
-
# This step is either using the text from the 'answer' tool call,
|
756 |
-
# or generating a fallback response if the model failed to call 'answer'.
|
757 |
-
|
758 |
-
print("\n--- Final Response Generation ---")
|
759 |
-
|
760 |
-
# If the model successfully called the 'answer' tool, use that text.
|
761 |
-
# Otherwise, construct a synthesis prompt for the model to generate a final answer.
|
762 |
-
if final_response_text is None:
|
763 |
-
print("Model did not call 'answer' tool. Falling back to synthesis prompt.")
|
764 |
-
# Model failed to call the 'answer' tool within iterations or encountered an error.
|
765 |
-
# Fallback: Generate a response based on the accumulated history and tool results.
|
766 |
-
|
767 |
-
# The history `model_chat_history` now contains the full trace of tool calls
|
768 |
-
# and the user messages containing the tool results.
|
769 |
-
|
770 |
-
# Construct the synthesis prompt content.
|
771 |
-
# MODIFIED Synthesis Prompt to emphasize comprehensive answer
|
772 |
-
synthesis_prompt_content = """<system>
|
773 |
-
Please provide a final, comprehensive answer to the user's original query based on ALL the information gathered from the executed tools and the conversation history. Synthesize the information into a coherent, natural language response. Pay special attention to providing detailed descriptions and listing all relevant points found from the business lookup tool when multiple items were retrieved.
|
774 |
-
|
775 |
-
User's original query: "{original_user_input}"
|
776 |
-
|
777 |
-
Information gathered from tools and process notes:
|
778 |
-
{gathered_info_summary}
|
779 |
-
|
780 |
-
Synthesize ALL relevant information into a clear, concise, and **comprehensive** natural language response for the user. When presenting information from multiple business lookup results, structure your answer to clearly describe each item found (e.g., list them, describe each one fully).
|
781 |
-
|
782 |
-
**Guidelines for your response:**
|
783 |
-
- Address the user's original question directly.
|
784 |
-
- Use the information provided in the 'Information gathered' section, synthesizing details from all relevant results.
|
785 |
-
- If the business lookup returned multiple matches, present the information for *each* match found clearly and informatively.
|
786 |
-
- If a tool was executed but returned no relevant results (especially if the best score was below the threshold), or if there were errors (<system_error>, <error>, <system_note> tags), explain this gracefully to the user.
|
787 |
-
- Maintain a helpful, polite, and professional business tone, reflecting the Futuresony brand and your identity as FutureAi.
|
788 |
-
- Do NOT include raw tool call or result tags in your final answer.
|
789 |
-
- If you were unable to gather necessary information, clearly state what you could and could not find.
|
790 |
-
|
791 |
-
After your answer, generate 2-3 concise follow-up questions that might be helpful or relevant to the user based on the conversation and your response. List these questions clearly at the end.
|
792 |
-
If Search Results were used, list the relevant URLs under a "Sources:" heading at the very end.
|
793 |
-
</system>
|
794 |
-
"""
|
795 |
-
|
796 |
-
# Summarize the gathered information by processing the model_chat_history
|
797 |
-
gathered_info_summary = ""
|
798 |
-
# unique_urls = set() # Moved initialization outside the if block # Commented out as initialization is now at the beginning of the function
|
799 |
-
|
800 |
-
# Iterate through the model history to find user messages that followed an assistant message
|
801 |
-
# These 'user' messages should contain the tool results block if tools were run.
|
802 |
-
# We iterate up to the second-to-last message, as the *very* last message in history
|
803 |
-
# will be the synthesis prompt itself, which hasn't been processed yet.
|
804 |
-
for i in range(1, len(model_chat_history)):
|
805 |
-
# Look for 'user' messages that follow an 'assistant' message
|
806 |
-
if model_chat_history[i]['role'] == 'user' and isinstance(model_chat_history[i]['content'], str) and '<tool_results>' in model_chat_history[i]['content']:
|
807 |
-
msg_content = model_chat_history[i]['content']
|
808 |
-
# Check if it contains the tool results block
|
809 |
-
tool_results_block = re.search(r'<tool_results>(.*?)</tool_results>', msg_content, re.DOTALL)
|
810 |
-
if tool_results_block:
|
811 |
-
content = tool_results_block.group(1) # Content inside <tool_results>
|
812 |
-
|
813 |
-
# --- Extract and format info from tool result blocks ---
|
814 |
-
search_blocks = re.findall(r'<search_results_for_query.*?>(.*?)</search_results_for_query>', content, re.DOTALL)
|
815 |
-
for sr_content in search_blocks:
|
816 |
-
query_match = re.search(r"query='(.*?)'", sr_content) # Extract query attribute
|
817 |
-
query = query_match.group(1) if query_match else "Unknown"
|
818 |
-
gathered_info_summary += f"Search results for '{query}':\n"
|
819 |
-
items = re.findall(r'<item>(.*?)</item>', sr_content, re.DOTALL)
|
820 |
-
if items:
|
821 |
-
for item_content in items:
|
822 |
-
title = re.search(r'<title>(.*?)</title>', item_content, re.DOTALL)
|
823 |
-
snippet = re.search(r'<snippet>(.*?)</snippet>', item_content, re.DOTALL)
|
824 |
-
url = re.search(r'<url>(.*?)</url>', item_content, re.DOTALL)
|
825 |
-
title_text = title.group(1).strip() if title else 'N/A'
|
826 |
-
snippet_text = snippet.group(1).strip() if snippet else 'N/A'
|
827 |
-
url_text = url.group(1).strip() if url else 'N/A'
|
828 |
-
gathered_info_summary += f"- Title: {title_text}, Snippet: {snippet_text}\n"
|
829 |
-
if url_text and url_text != 'N/A':
|
830 |
-
unique_urls.add(url_text) # Add URL to set
|
831 |
-
|
832 |
-
elif "No results found" in sr_content:
|
833 |
-
gathered_info_summary += "- No results found.\n"
|
834 |
-
elif "<error>" in sr_content:
|
835 |
-
error_text = re.search(r'<error>(.*?)</error>', sr_content, re.DOTALL)
|
836 |
-
gathered_info_summary += f"- Error during search: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
|
837 |
-
|
838 |
-
|
839 |
-
# Business lookup results (MODIFIED to handle MULTIPLE match tags)
|
840 |
-
lookup_blocks = re.findall(r'<lookup_business_info_results_for_query.*?>(.*?)</lookup_business_info_results_for_query>', content, re.DOTALL)
|
841 |
-
for lr_content in lookup_blocks:
|
842 |
-
query_match = re.search(r"query='(.*?)'", lr_content)
|
843 |
-
query = query_match.group(1) if query_match else "Unknown"
|
844 |
-
# Extract requested_threshold, requested_max_matches, final_best_score
|
845 |
-
req_thresh_match = re.search(r"requested_threshold='(.*?)'", lr_content)
|
846 |
-
req_thresh = float(req_thresh_match.group(1)) if req_thresh_match else 0.50
|
847 |
-
req_max_matches_match = re.search(r"requested_max_matches='(.*?)'", lr_content)
|
848 |
-
req_max_matches = int(req_max_matches_match.group(1)) if req_max_matches_match else 5
|
849 |
-
final_best_score_match = re.search(r"final_best_score='(.*?)'", lr_content)
|
850 |
-
final_best_score = float(final_best_score_match.group(1)) if final_best_score_match else 0.0
|
851 |
-
|
852 |
-
|
853 |
-
gathered_info_summary += f"Business lookup results for '{query}' (Requested Threshold: {req_thresh:.4f}, Requested Max Matches: {req_max_matches}, Final Best Score: {final_best_score:.4f}):\n"
|
854 |
-
|
855 |
-
matches_found = re.findall(r'<match>(.*?)</match>', lr_content, re.DOTALL) # Find ALL match tags
|
856 |
-
if matches_found:
|
857 |
-
gathered_info_summary += f" Found {len(matches_found)} relevant item(s):\n"
|
858 |
-
for match_content in matches_found: # Iterate through each match
|
859 |
-
service = re.search(r'<service>(.*?)</service>', match_content, re.DOTALL)
|
860 |
-
description = re.search(r'<description>(.*?)</description>', match_content, re.DOTALL)
|
861 |
-
price = re.search(r'<price>(.*?)</price>', match_content, re.DOTALL)
|
862 |
-
available = re.search(r'<available>(.*?)</available>', match_content, re.DOTALL)
|
863 |
-
# Add extraction for other fields if you include them in your tool output
|
864 |
-
# contact_person = re.search(r'<contact_person>(.*?)</contact_person>', match_content, re.DOTALL)
|
865 |
-
|
866 |
-
gathered_info_summary += f" - Service: {service.group(1).strip() if service else 'N/A'}\n"
|
867 |
-
gathered_info_summary += f" Description: {description.group(1).strip() if description else 'N/A'}\n"
|
868 |
-
gathered_info_summary += f" Price: {price.group(1).strip() if price else 'N/A'}\n"
|
869 |
-
gathered_info_summary += f" Available: {available.group(1).strip() if available else 'N/A'}\n"
|
870 |
-
# Add other fields here...
|
871 |
-
# if contact_person: gathered_info_summary += f" Contact Person: {contact_person.group(1).strip()}\n"
|
872 |
-
|
873 |
-
elif "No relevant matches found" in lr_content:
|
874 |
-
score_match = re.search(r"final_best_score='(.*?)'", lr_content) # Look for final_best_score
|
875 |
-
score = float(score_match.group(1)) if score_match else 0.0
|
876 |
-
threshold_match = re.search(r"requested_threshold='(.*?)'", lr_content)
|
877 |
-
threshold_val = float(threshold_match.group(1)) if threshold_match else 0.50
|
878 |
-
|
879 |
-
gathered_info_summary += f" No relevant matches found above threshold {threshold_val:.4f} (best score: {score:.4f}).\n"
|
880 |
-
elif "<error>" in lr_content:
|
881 |
-
error_text = re.search(r'<error>(.*?)</error>', lr_content, re.DOTALL)
|
882 |
-
gathered_info_summary += f" Error during business lookup: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
|
883 |
-
|
884 |
-
# Include system notes found within the business lookup results block
|
885 |
-
system_notes_in_lookup = re.findall(r'<system_note>(.*?)</system_note>', lr_content, re.DOTALL)
|
886 |
-
for note in system_notes_in_lookup:
|
887 |
-
gathered_info_summary += f" System Note within Lookup: {note.strip()}\n"
|
888 |
-
|
889 |
-
|
890 |
-
# Date calculation results
|
891 |
-
date_blocks = re.findall(r'<perform_date_calculation_results_for_query.*?>(.*?)</perform_date_calculation_results_for_query>', content, re.DOTALL)
|
892 |
-
for dr_content in date_blocks:
|
893 |
-
query_match = re.search(r"query='(.*?)'", dr_content)
|
894 |
-
query = query_match.group(1) if query_match else "Unknown"
|
895 |
-
gathered_info_summary += f"Date calculation results for '{query}':\n"
|
896 |
-
date_val = re.search(r'<date>(.*?)</date>', dr_content, re.DOTALL)
|
897 |
-
desc = re.search(r'<description>(.*?)</description>', dr_content, re.DOTALL)
|
898 |
-
if date_val:
|
899 |
-
gathered_info_summary += f"- Result: {date_val.group(1).strip()}\n"
|
900 |
-
if desc: gathered_info_summary += f" Description: {desc.group(1).strip()}\n"
|
901 |
-
elif desc:
|
902 |
-
gathered_info_summary += f"- {desc.group(1).strip()}\n"
|
903 |
-
elif "<error>" in dr_content:
|
904 |
-
error_text = re.search(r'<error>(.*?)</error>', dr_content, re.DOTALL)
|
905 |
-
gathered_info_summary += f"- Error during date calculation: {error_text.group(1).strip() if error_text else 'Unknown error'}\n"
|
906 |
-
else:
|
907 |
-
gathered_info_summary += "- No specific date result found.\n"
|
908 |
-
|
909 |
-
|
910 |
-
# System Notes/Errors from Tool Execution (outside of specific tool blocks but within <tool_results>)
|
911 |
-
system_notes_in_results_block = re.findall(r'<system_note>(.*?)</system_note>', content, re.DOTALL)
|
912 |
-
for note in system_notes_in_results_block:
|
913 |
-
# Add only if not already added from within a specific lookup block
|
914 |
-
if f"System Note: {note.strip()}\n" not in gathered_info_summary and f"System Note within Lookup: {note.strip()}\n" not in gathered_info_summary:
|
915 |
-
gathered_info_summary += f"System Note from Tool Results: {note.strip()}\n"
|
916 |
-
|
917 |
-
system_errors_in_results_block = re.findall(r'<system_error>(.*?)</system_error>', content, re.DOTALL)
|
918 |
-
for error_note in system_errors_in_results_block:
|
919 |
-
gathered_info_summary += f"System Error from Tool Results: {error_note.strip()}\n"
|
920 |
-
|
921 |
-
# Also check the raw model output (last assistant message) for system errors if tool results block wasn't generated
|
922 |
-
last_assistant_message_content = model_chat_history[-1]['content'] if model_chat_history and model_chat_history[-1]['role'] == 'assistant' else ""
|
923 |
-
system_errors_in_raw_output = re.findall(r'<system_error>(.*?)</system_error>', last_assistant_message_content, re.DOTALL)
|
924 |
-
for error_note in system_errors_in_raw_output:
|
925 |
-
# Add only if not already captured from within tool results block
|
926 |
-
if f"System Error from Tool Results: {error_note.strip()}" not in gathered_info_summary:
|
927 |
-
gathered_info_summary += f"System Error in model output: {error_note.strip()}\n"
|
928 |
-
|
929 |
-
# Check for system notes/errors that might be outside <tool_results> but in the raw assistant output
|
930 |
-
system_notes_in_raw_output = re.findall(r'<system_note>(.*?)</system_note>', last_assistant_message_content, re.DOTALL)
|
931 |
-
for note in system_notes_in_raw_output:
|
932 |
-
if f"System Note from Tool Results: {note.strip()}" not in gathered_info_summary and f"Business Lookup Note: {note.strip()}\n" not in gathered_info_summary: # Avoid duplicates
|
933 |
-
gathered_info_summary += f"System Note in model output: {note.strip()}\n"
|
934 |
-
|
935 |
-
|
936 |
-
if not gathered_info_summary.strip():
|
937 |
-
gathered_info_summary = "No specific information was gathered using tools."
|
938 |
-
|
939 |
-
# Add the synthesis prompt to the history for the final generation step
|
940 |
-
# This keeps the history structure correct for apply_chat_template
|
941 |
-
# IMPORTANT: This adds the synthesis prompt as the final USER message.
|
942 |
-
# The model will then generate the final ASSISTANT response.
|
943 |
-
temp_chat_history_for_synthesis = model_chat_history.copy() # Copy the history including tool results
|
944 |
-
|
945 |
-
synthesis_prompt_formatted = synthesis_prompt_content.format(
|
946 |
-
original_user_input=original_user_input,
|
947 |
-
gathered_info_summary=gathered_info_summary.strip() # Add the summary of results
|
948 |
-
)
|
949 |
-
|
950 |
-
# Append the synthesis prompt as the final user message content
|
951 |
-
# This maintains the user/assistant alternation (last was assistant, now user for synthesis instruction)
|
952 |
-
temp_chat_history_for_synthesis.append({"role": "user", "content": synthesis_prompt_formatted.strip()})
|
953 |
-
|
954 |
-
# Generate the final response using the history with the synthesis prompt
|
955 |
-
print("Generating final synthesized response...")
|
956 |
-
try:
|
957 |
-
final_synthesis_response = client.chat_completion(
|
958 |
-
messages=temp_chat_history_for_synthesis, # Use the history with the synthesis prompt
|
959 |
-
max_tokens=1024, # Allow reasonable tokens for synthesis
|
960 |
-
stream=False,
|
961 |
-
temperature=0.5, # Allow a bit more creativity for synthesis
|
962 |
-
top_p=0.95,
|
963 |
-
)
|
964 |
-
|
965 |
-
if final_synthesis_response and final_synthesis_response.choices:
|
966 |
-
final_response_text = final_synthesis_response.choices[0].message.content.strip()
|
967 |
-
print(f"Synthesized response generated: {final_response_text[:100]}...")
|
968 |
-
else:
|
969 |
-
final_response_text = "I was unable to generate a comprehensive answer based on the information gathered."
|
970 |
-
print("Synthesis generation failed.")
|
971 |
-
except Exception as e:
|
972 |
-
print(f"Error during synthesis generation: {e}")
|
973 |
-
print(traceback.format_exc())
|
974 |
-
final_response_text = f"An error occurred while synthesizing the response: {e}"
|
975 |
-
|
976 |
-
|
977 |
-
# Add Sources if any URLs were collected
|
978 |
-
if unique_urls:
|
979 |
-
final_response_text += "\n\nSources:\n" + "\n".join(sorted(list(unique_urls))) # Sort URLs alphabetically
|
980 |
-
|
981 |
-
|
982 |
-
# Update the last message in chat_history_state with the final response
|
983 |
-
# Find the last turn in the original chat_history_state (which was already updated with the placeholder)
|
984 |
-
if chat_history_state and len(chat_history_state) > 0:
|
985 |
-
chat_history_state[-1][1] = final_response_text # Update the bot's message in the last turn
|
986 |
-
|
987 |
-
# Remove the [[TEXT]] marker from the chat_history_state *before* yielding
|
988 |
-
# Iterate through the history and clean each item
|
989 |
-
cleaned_chat_history_list_of_lists = []
|
990 |
-
for user_msg, bot_msg in chat_history_state:
|
991 |
-
cleaned_user_msg = user_msg.replace('[[TEXT]] ', '') if isinstance(user_msg, str) else user_msg
|
992 |
-
cleaned_bot_msg = bot_msg.replace('[[TEXT]] ', '') if isinstance(bot_msg, str) else bot_msg
|
993 |
-
cleaned_chat_history_list_of_lists.append([cleaned_user_msg, cleaned_bot_msg])
|
994 |
-
|
995 |
-
# Convert the cleaned list of lists to a list of tuples as required by Gradio
|
996 |
-
cleaned_chat_history_list_of_tuples = [(user_msg, bot_msg) for user_msg, bot_msg in cleaned_chat_history_list_of_lists]
|
997 |
-
|
998 |
-
|
999 |
-
print(f"\n--- Final Response: {final_response_text[:100]}... ---") # Debug Print
|
1000 |
-
|
1001 |
-
# Yield the cleaned history as a list of tuples
|
1002 |
-
yield cleaned_chat_history_list_of_tuples # Yield the cleaned history as tuples
|
1003 |
|
|
|
|
|
|
|
1004 |
|
1005 |
-
#
|
1006 |
-
|
1007 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1008 |
|
1009 |
-
# The Gradio interface definition remains the same as it correctly
|
1010 |
-
# uses the updated respond function.
|
1011 |
|
|
|
1012 |
print(f"RAG functionality available: {business_info_available}")
|
1013 |
|
1014 |
demo = gr.ChatInterface(
|
1015 |
-
fn=
|
1016 |
additional_inputs=[
|
1017 |
-
gr.Textbox(value=
|
1018 |
-
gr.Slider(1,
|
1019 |
-
gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature
|
1020 |
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (nucleus sampling)"),
|
1021 |
],
|
1022 |
-
|
1023 |
-
|
1024 |
-
description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API, with business info retrieved from Google Sheets and external search capabilities.", # Updated description
|
1025 |
-
submit_btn="Send", # Renamed submit button
|
1026 |
-
# Removed clear_btn as it caused a TypeError in the user's environment
|
1027 |
)
|
1028 |
|
1029 |
-
# Enable request queueing (concurrency handled automatically on Gradio ≥
|
1030 |
demo.queue()
|
1031 |
|
1032 |
if __name__ == "__main__":
|
1033 |
# Authenticate and load data before launching the demo
|
|
|
1034 |
if authenticate_google_sheets():
|
1035 |
load_business_info()
|
1036 |
else:
|
1037 |
print("Google Sheets authentication failed. RAG functionality will not be available.")
|
1038 |
|
1039 |
-
# The print statement for RAG status is added here, before launching the demo.
|
1040 |
print(f"RAG functionality available: {business_info_available}")
|
1041 |
|
1042 |
-
|
|
|
|
|
|
1 |
+
# This script combines all components for deployment on Hugging Face Spaces.
|
|
|
2 |
|
3 |
+
# --- Imports ---
|
4 |
import os
|
5 |
import gradio as gr
|
6 |
from huggingface_hub import InferenceClient
|
|
|
12 |
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
|
13 |
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
14 |
import gspread
|
15 |
+
# from google.colab import auth # Colab specific, remove for HF Spaces
|
16 |
+
# from google.auth import default # Colab specific, remove for HF Spaces
|
17 |
from tqdm import tqdm
|
18 |
from duckduckgo_search import DDGS
|
19 |
import spacy
|
20 |
from datetime import date, timedelta
|
21 |
+
from dateutil.relativedelta import relativedelta
|
22 |
+
import traceback
|
23 |
+
import base64
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Suppress warnings
|
26 |
warnings.filterwarnings("ignore", category=UserWarning)
|
27 |
|
28 |
+
# --- Global Variables and Secrets ---
|
29 |
+
# HF_TOKEN is automatically available in HF Spaces secrets
|
30 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
31 |
+
# GOOGLE_BASE64_CREDENTIALS should be added as a Space Secret
|
32 |
+
SHEET_ID = os.getenv("SHEET_ID") # Get SHEET_ID from Space Secrets
|
33 |
GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
|
34 |
|
35 |
+
# --- Model and Tool Initialization ---
|
36 |
+
client = None # Initialize after HF_TOKEN is confirmed available
|
|
|
|
|
37 |
nlp = None
|
38 |
+
embedder = None
|
39 |
+
reranker = None
|
40 |
+
|
41 |
try:
|
42 |
+
# Initialize InferenceClient
|
43 |
+
if HF_TOKEN:
|
44 |
+
client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
|
45 |
+
print("Hugging Face Inference Client initialized.")
|
46 |
+
else:
|
47 |
+
print("Warning: HF_TOKEN not found. Inference Client not initialized.")
|
48 |
+
|
49 |
+
# Load spacy model for sentence splitting
|
50 |
try:
|
|
|
51 |
nlp = spacy.load("en_core_web_sm")
|
52 |
+
print("SpaCy model 'en_core_web_sm' loaded.")
|
53 |
+
except OSError:
|
54 |
+
print("SpaCy model 'en_core_web_sm' not found. Downloading...")
|
55 |
+
try:
|
56 |
+
# Use pip for installation in HF Spaces environment
|
57 |
+
os.system("pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz")
|
58 |
+
nlp = spacy.load("en_core_web_sm")
|
59 |
+
print("SpaCy model 'en_core_web_sm' downloaded and loaded.")
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Failed to download or load SpaCy model: {e}")
|
62 |
|
63 |
+
# Load SentenceTransformer for RAG/business info retrieval
|
|
|
|
|
64 |
print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
|
65 |
embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
|
66 |
print("Sentence Transformer loaded.")
|
|
|
|
|
|
|
67 |
|
68 |
+
# Load a Cross-Encoder model for re-ranking retrieved documents
|
|
|
|
|
69 |
print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
|
70 |
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
|
71 |
print("Cross-Encoder Reranker loaded.")
|
72 |
+
|
73 |
except Exception as e:
|
74 |
+
print(f"An error occurred during model/tool initialization: {e}")
|
|
|
75 |
print(traceback.format_exc())
|
|
|
76 |
|
77 |
|
78 |
+
# --- Google Sheets Authentication ---
|
79 |
gc = None # Global variable for gspread client
|
80 |
def authenticate_google_sheets():
|
81 |
"""Authenticates with Google Sheets using base64 encoded credentials."""
|
|
|
83 |
print("Authenticating Google Account...")
|
84 |
if not GOOGLE_BASE64_CREDENTIALS:
|
85 |
print("Error: GOOGLE_BASE64_CREDENTIALS secret not found.")
|
86 |
+
print("Please add GOOGLE_BASE64_CREDENTIALS as a Space Secret.")
|
87 |
return False
|
88 |
|
89 |
try:
|
|
|
101 |
print(traceback.format_exc())
|
102 |
return False
|
103 |
|
104 |
+
# --- Google Sheets Data Loading and Embedding ---
|
105 |
+
data = [] # Global variable to store loaded data
|
|
|
106 |
descriptions_for_embedding = []
|
107 |
embeddings = torch.tensor([])
|
108 |
business_info_available = False # Flag to indicate if business info was loaded successfully
|
109 |
|
110 |
def load_business_info():
|
111 |
"""Loads business information from Google Sheet and creates embeddings."""
|
112 |
+
global data, descriptions_for_embedding, embeddings, business_info_available
|
113 |
business_info_available = False # Reset flag
|
114 |
|
115 |
if gc is None:
|
|
|
118 |
|
119 |
if not SHEET_ID:
|
120 |
print("Error: SHEET_ID not set.")
|
121 |
+
print("Please add SHEET_ID as a Space Secret.")
|
122 |
return
|
123 |
|
124 |
try:
|
|
|
128 |
|
129 |
if not data_records:
|
130 |
print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
|
131 |
+
data = []
|
132 |
descriptions_for_embedding = []
|
133 |
else:
|
134 |
# Filter out rows missing 'Service' or 'Description'
|
135 |
filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
|
136 |
if not filtered_data:
|
137 |
print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
|
138 |
+
data = []
|
139 |
descriptions_for_embedding = []
|
140 |
else:
|
141 |
+
data = filtered_data
|
|
|
142 |
descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]
|
143 |
|
|
|
144 |
if descriptions_for_embedding and embedder is not None:
|
145 |
print("Encoding descriptions...")
|
146 |
try:
|
|
|
149 |
business_info_available = True # Set flag if successful
|
150 |
except Exception as e:
|
151 |
print(f"Error during description encoding: {e}")
|
152 |
+
embeddings = torch.tensor([])
|
153 |
business_info_available = False # Encoding failed
|
154 |
else:
|
155 |
print("Skipping encoding descriptions: No descriptions found or embedder not available.")
|
156 |
+
embeddings = torch.tensor([])
|
157 |
+
business_info_available = False
|
158 |
|
159 |
print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
|
160 |
if not business_info_available:
|
|
|
163 |
except gspread.exceptions.SpreadsheetNotFound:
|
164 |
print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
|
165 |
print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
|
166 |
+
business_info_available = False
|
167 |
except Exception as e:
|
168 |
print(f"An error occurred while accessing the Google Sheet: {e}")
|
169 |
print(traceback.format_exc())
|
170 |
+
business_info_available = False
|
171 |
|
172 |
+
# --- Business Info Retrieval (RAG) ---
|
173 |
+
def retrieve_business_info(query: str, top_n: int = 3) -> list:
|
174 |
"""
|
175 |
Retrieves relevant business information from loaded data based on a query.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
"""
|
177 |
global data
|
178 |
if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
|
179 |
print("Business information retrieval is not available or data is empty.")
|
180 |
+
return []
|
181 |
|
182 |
try:
|
183 |
query_embedding = embedder.encode(query, convert_to_tensor=True)
|
184 |
cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
|
185 |
+
top_results_indices = torch.topk(cosine_scores, k=min(top_n, len(data)))[1].tolist()
|
186 |
+
top_results = [data[i] for i in top_results_indices]
|
187 |
|
188 |
+
if reranker is not None and top_results:
|
189 |
+
print("Re-ranking top results...")
|
190 |
+
rerank_pairs = [(query, descriptions_for_embedding[i]) for i in top_results_indices]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
rerank_scores = reranker.predict(rerank_pairs)
|
192 |
+
reranked_indices = sorted(range(len(rerank_scores)), key=lambda i: rerank_scores[i], reverse=True)
|
193 |
+
reranked_results = [top_results[i] for i in reranked_indices]
|
|
|
|
|
|
|
194 |
print("Re-ranking complete.")
|
195 |
+
return reranked_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
else:
|
197 |
+
return top_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
except Exception as e:
|
200 |
print(f"Error during business information retrieval: {e}")
|
201 |
print(traceback.format_exc())
|
202 |
+
return []
|
|
|
203 |
|
204 |
+
# --- Tool Functions ---
|
205 |
# Function to perform DuckDuckGo Search and return results with URLs
|
206 |
def perform_duckduckgo_search(query: str, max_results: int = 5):
|
207 |
"""
|
|
|
212 |
print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
|
213 |
search_results_list = []
|
214 |
try:
|
|
|
215 |
time.sleep(1) # Sleep for 1 second
|
|
|
216 |
with DDGS() as ddgs:
|
217 |
if not query or len(query.split()) < 2:
|
218 |
print(f"Skipping search for short query: '{query}'")
|
219 |
return []
|
|
|
|
|
220 |
results_generator = ddgs.text(query, max_results=max_results)
|
221 |
results_found = False
|
222 |
for r in results_generator:
|
223 |
search_results_list.append(r)
|
224 |
results_found = True
|
|
|
225 |
if not results_found and max_results > 0:
|
226 |
print(f"DuckDuckGo search for '{query}' returned no results.")
|
|
|
|
|
227 |
except Exception as e:
|
228 |
print(f"Error during Duckduckgo search for '{query}': {e}")
|
229 |
return []
|
|
|
230 |
return search_results_list
|
231 |
|
232 |
# Function to perform date calculation if needed
|
|
|
234 |
"""
|
235 |
Analyzes query for date calculation requests and performs the calculation.
|
236 |
Returns a dict describing the calculation and result, or None.
|
|
|
|
|
237 |
"""
|
238 |
print(f"Executing Tool: perform_date_calculation with query='{query}')")
|
239 |
query_lower = query.lower()
|
|
|
292 |
return {"query": query, "description": desc, "result": None, "success": False}
|
293 |
|
294 |
|
295 |
+
# --- Tool Definitions and System Prompt ---
|
|
|
|
|
296 |
TOOL_DEFINITIONS = """
|
297 |
Available tools:
|
298 |
1. **search**: Use this tool to perform a web search for current external information. Useful for facts, news, weather, etc.
|
|
|
316 |
|
317 |
"""
|
318 |
|
|
|
|
|
|
|
|
|
319 |
tool_use_system_template = """<system>
|
320 |
You are FutureAi, a helpful, polite, and professional assistant for Futuresony. Your primary goal is to assist the user by effectively using the available tools or answering directly based on the conversation history and tool outputs. Maintain a positive and helpful tone. If you are unsure or a tool returns no clear results, state this gracefully. When providing answers based on gathered information, aim for a comprehensive and detailed response, synthesizing all relevant points from the tool outputs.
|
321 |
|
|
|
356 |
MAX_HISTORY_TURNS = 5 # Keep last 5 turns
|
357 |
|
358 |
|
359 |
+
# --- Chat Handler ---
|
360 |
+
def respond(
|
361 |
+
message: str,
|
362 |
+
history: list[tuple[str, str]],
|
363 |
+
system_message: str,
|
364 |
+
max_tokens: int,
|
365 |
+
temperature: float,
|
366 |
+
top_p: float,
|
367 |
+
):
|
368 |
+
global client # Ensure client is accessible
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
|
370 |
+
if client is None:
|
371 |
+
yield "Error: Hugging Face Inference Client not initialized. Please check your HF_TOKEN Space Secret."
|
372 |
+
return
|
373 |
|
374 |
+
# Retrieve relevant business information based on the user's message
|
375 |
+
retrieved_info = retrieve_business_info(message)
|
376 |
+
|
377 |
+
# Build ChatML conversation
|
378 |
+
messages = [{"role": "system", "content": system_message}]
|
379 |
+
|
380 |
+
# Include retrieved information as context if available
|
381 |
+
if retrieved_info:
|
382 |
+
context_message = "Use the following business information to help answer the user's question if relevant:\n"
|
383 |
+
for i, info in enumerate(retrieved_info):
|
384 |
+
context_message += f"--- Business Info Entry {i+1} ---\n"
|
385 |
+
for key, value in info.items():
|
386 |
+
context_message += f"{key}: {str(value)}\n"
|
387 |
+
context_message += "---\n"
|
388 |
+
messages.append({"role": "user", "content": context_message})
|
389 |
+
print("Added retrieved business info to messages.")
|
390 |
+
|
391 |
+
# Add conversation history
|
392 |
+
for user_msg, bot_msg in history:
|
393 |
+
if user_msg:
|
394 |
+
messages.append({"role": "user", "content": user_msg})
|
395 |
+
if bot_msg:
|
396 |
+
messages.append({"role": "assistant", "content": bot_msg})
|
397 |
+
|
398 |
+
# Add the current user message
|
399 |
+
messages.append({"role": "user", "content": message})
|
400 |
+
|
401 |
+
# Stream tokens
|
402 |
+
response = ""
|
403 |
+
try:
|
404 |
+
for chunk in client.chat_completion(
|
405 |
+
messages=messages,
|
406 |
+
max_tokens=max_tokens,
|
407 |
+
stream=True,
|
408 |
+
temperature=temperature,
|
409 |
+
top_p=top_p,
|
410 |
+
):
|
411 |
+
token = chunk.choices[0].delta.content or ""
|
412 |
+
response += token
|
413 |
+
yield response
|
414 |
+
except Exception as e:
|
415 |
+
print(f"Error during chat completion: {e}")
|
416 |
+
print(traceback.format_exc())
|
417 |
+
yield f"An error occurred: {e}"
|
418 |
|
|
|
|
|
419 |
|
420 |
+
# --- Gradio interface ---
|
421 |
print(f"RAG functionality available: {business_info_available}")
|
422 |
|
423 |
demo = gr.ChatInterface(
|
424 |
+
fn=respond,
|
425 |
additional_inputs=[
|
426 |
+
gr.Textbox(value=tool_use_system_template.format(current_date=date.today().strftime('%Y-%m-%d'), tool_definitions=TOOL_DEFINITIONS), label="System message", interactive=False), # Use the template here
|
427 |
+
gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
|
428 |
+
gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
|
429 |
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (nucleus sampling)"),
|
430 |
],
|
431 |
+
title="Gemma‑2‑9B‑IT Chat with RAG and Tools",
|
432 |
+
description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API, with business info retrieved from Google Sheets and external search/date tools.",
|
|
|
|
|
|
|
433 |
)
|
434 |
|
435 |
+
# Enable request queueing (concurrency handled automatically on Gradio ≥ 4)
|
436 |
demo.queue()
|
437 |
|
438 |
if __name__ == "__main__":
|
439 |
# Authenticate and load data before launching the demo
|
440 |
+
# These steps need to happen when the script is run
|
441 |
if authenticate_google_sheets():
|
442 |
load_business_info()
|
443 |
else:
|
444 |
print("Google Sheets authentication failed. RAG functionality will not be available.")
|
445 |
|
|
|
446 |
print(f"RAG functionality available: {business_info_available}")
|
447 |
|
448 |
+
# Launch the Gradio interface
|
449 |
+
# debug=True is useful for development, remove for production
|
450 |
+
demo.launch()
|