Browse files
@@ -6,7 +6,7 @@ import json
6 |
import re
7 |
from uuid import uuid4
8 |
from datetime import datetime
9 |
from duckduckgo_search import
10 |
from sentence_transformers import SentenceTransformer, util
11 |
from typing import List, Dict, Any, Optional, Union, Tuple
12 |
import logging
@@ -24,7 +24,7 @@ if not HF_API_KEY:
24 |
raise ValueError("Please set the HF_API_KEY environment variable.")
25 |
26 |
# You can use different models for different tasks
27 |
MAIN_LLM_ENDPOINT = "your-main-llm-endpoint"
28 |
REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed
29 |
CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed
30 |
@@ -48,7 +48,7 @@ except Exception as e:
48 |
def hf_inference(endpoint, inputs, parameters=None, retries=5):
49 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
50 |
payload = {"inputs": inputs, "parameters": parameters or {}}
51 |
52 |
for attempt in range(retries):
53 |
54 |
response =, headers=headers, json=payload, timeout=TIMEOUT)
@@ -61,40 +61,41 @@ def hf_inference(endpoint, inputs, parameters=None, retries=5):
61 |
time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff
62 |
return {"error": "Request failed after multiple retries."}
63 |
64 |
def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
65 |
time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list:
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
except Exception as e:
74 |
logger.error(f"DuckDuckGo search error: {e}")
75 |
return []
76 |
77 |
def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
78 |
critique: str = "", focus_areas: list = []) -> str:
79 |
if not search_results:
80 |
return "No search results to reason about."
81 |
82 |
reasoning_input = "Reason about the following search results in relation to the prompt:\n\n"
83 |
reasoning_input += f"Prompt: {prompt}\n\n"
84 |
85 |
if focus_areas:
86 |
reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n"
87 |
88 |
for i, result in enumerate(search_results):
89 |
reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n"
90 |
91 |
if reasoning_context:
92 |
recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
93 |
reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
94 |
95 |
if critique:
96 |
reasoning_input += f"\n\nRecent critique to address: {critique}\n"
97 |
98 |
reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results."
99 |
100 |
reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input)
@@ -108,15 +109,15 @@ def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
108 |
def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str:
109 |
if not insights:
110 |
return "No insights to summarize."
111 |
112 |
summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
113 |
summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights
114 |
115 |
if contradictions:
116 |
summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions)
117 |
118 |
summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry"
119 |
120 |
summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input)
121 |
122 |
if isinstance(summarization_output, dict) and "generated_text" in summarization_output:
@@ -125,120 +126,120 @@ def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> st
125 |
logger.error(f"Failed to generate summary: {summarization_output}")
126 |
return "Could not generate a summary due to an error."
127 |
128 |
def tool_generate_search_query(prompt: str, previous_queries: list = [],
129 |
failed_queries: list = [], focus_areas: list = []) -> str:
130 |
query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n"
131 |
132 |
if previous_queries:
133 |
recent_queries = previous_queries[-MAX_HISTORY_ITEMS:]
134 |
query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n"
135 |
136 |
if failed_queries:
137 |
query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n"
138 |
139 |
if focus_areas:
140 |
query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n"
141 |
142 |
query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n"
143 |
query_gen_input += "Search Query:"
144 |
145 |
query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input)
146 |
147 |
if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output:
148 |
return query_gen_output['generated_text'].strip()
149 |
150 |
logger.error(f"Failed to generate search query: {query_gen_output}")
151 |
return ""
152 |
153 |
def tool_critique_reasoning(reasoning_output: str, prompt: str,
154 |
previous_critiques: list = []) -> str:
155 |
critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n"
156 |
157 |
if previous_critiques:
158 |
critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n"
159 |
160 |
critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning."
161 |
162 |
critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input)
163 |
164 |
if isinstance(critique_output, dict) and "generated_text" in critique_output:
165 |
return critique_output["generated_text"].strip()
166 |
167 |
logger.error(f"Failed to generate critique: {critique_output}")
168 |
return "Could not generate a critique due to an error."
169 |
170 |
def tool_identify_contradictions(insights: list) -> list:
171 |
if len(insights) < 2:
172 |
return []
173 |
174 |
contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:])
175 |
contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'"
176 |
177 |
contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input)
178 |
179 |
if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
180 |
result = contradiction_output["generated_text"].strip()
181 |
if result == "No contradictions found.":
182 |
return []
183 |
184 |
# Extract numbered contradictions
185 |
contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
186 |
return [c.strip() for c in contradictions if c.strip()]
187 |
188 |
logger.error(f"Failed to identify contradictions: {contradiction_output}")
189 |
return []
190 |
191 |
def tool_identify_focus_areas(prompt: str, insights: list = [],
192 |
failed_areas: list = []) -> list:
193 |
focus_input = f"Based on this research prompt: '{prompt}'\n\n"
194 |
195 |
if insights:
196 |
focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights
197 |
198 |
if failed_areas:
199 |
focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n"
200 |
201 |
focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas."
202 |
203 |
focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input)
204 |
205 |
if isinstance(focus_output, dict) and "generated_text" in focus_output:
206 |
result = focus_output["generated_text"].strip()
207 |
# Extract areas, assuming they're listed with numbers, bullets, or in separate lines
208 |
areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
209 |
return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3
210 |
211 |
logger.error(f"Failed to identify focus areas: {focus_output}")
212 |
return []
213 |
214 |
def filter_results(search_results, prompt, previous_snippets=None):
215 |
if not main_similarity_model or not search_results:
216 |
return search_results
217 |
218 |
219 |
prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True)
220 |
filtered_results = []
221 |
222 |
# Keep track of snippets we've already seen
223 |
seen_snippets = set()
224 |
if previous_snippets:
225 |
226 |
227 |
for result in search_results:
228 |
combined_text = result['title'] + " " + result['snippet']
229 |
230 |
# Skip if we've seen this exact snippet before
231 |
if result['snippet'] in seen_snippets:
232 |
233 |
234 |
result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
235 |
cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item()
236 |
237 |
if cosine_score >= SIMILARITY_THRESHOLD:
238 |
result['relevance_score'] = cosine_score
239 |
240 |
241 |
242 |
# Sort by relevance score
243 |
filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
244 |
return filtered_results
@@ -250,15 +251,15 @@ def filter_results(search_results, prompt, previous_snippets=None):
250 |
# New tool: Extract entities for focused research
251 |
def tool_extract_key_entities(prompt: str) -> list:
252 |
entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line."
253 |
254 |
entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input)
255 |
256 |
if isinstance(entity_output, dict) and "generated_text" in entity_output:
257 |
result = entity_output["generated_text"].strip()
258 |
# Split by lines and clean up
259 |
entities = [e.strip() for e in result.split('\n') if e.strip()]
260 |
return entities[:5] # Limit to 5 entities
261 |
262 |
logger.error(f"Failed to extract key entities: {entity_output}")
263 |
return []
264 |
@@ -266,20 +267,20 @@ def tool_extract_key_entities(prompt: str) -> list:
266 |
def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
267 |
if not entity_insights:
268 |
return "No entity insights to analyze."
269 |
270 |
meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n"
271 |
272 |
for entity, insights in entity_insights.items():
273 |
if insights:
274 |
meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity
275 |
276 |
meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question"
277 |
278 |
meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input)
279 |
280 |
if isinstance(meta_output, dict) and "generated_text" in meta_output:
281 |
return meta_output["generated_text"].strip()
282 |
283 |
logger.error(f"Failed to perform meta-analysis: {meta_output}")
284 |
return "Could not generate a meta-analysis due to an error."
285 |
@@ -384,7 +385,7 @@ Available Tools:
384 |
385 |
# Only include most recent context items to avoid exceeding context limits
386 |
recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context
387 |
388 |
prompt += "\nContext (most recent items):\n"
389 |
for item in recent_context:
390 |
prompt += f"- {item}\n"
@@ -416,17 +417,17 @@ def deep_research(prompt):
416 |
seen_snippets = set()
417 |
contradictions = []
418 |
research_session_id = str(uuid4())
419 |
420 |
# Start with entity extraction for multi-pronged research
421 |
key_entities = tool_extract_key_entities(prompt=prompt)
422 |
if key_entities:
423 |
context.append(f"Identified key entities: {key_entities}")
424 |
intermediate_output += f"Identified key entities for focused research: {key_entities}\n"
425 |
426 |
# Tracking progress for each entity
427 |
entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
428 |
entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities
429 |
430 |
for i in range(MAX_ITERATIONS):
431 |
# Decide which entity to focus on this iteration, or general research
432 |
if key_entities and i > 0:
@@ -435,9 +436,9 @@ def deep_research(prompt):
435 |
current_entity = entities_to_process[i % len(entities_to_process)]
436 |
437 |
current_entity = 'general'
438 |
439 |
context.append(f"Current focus: {current_entity}")
440 |
441 |
# First iteration: general query and initial research
442 |
if i == 0:
443 |
initial_query = tool_generate_search_query(prompt=prompt)
@@ -446,10 +447,10 @@ def deep_research(prompt):
446 |
447 |
search_results = tool_search_web(query=initial_query)
448 |
filtered_search_results = filter_results(search_results, prompt)
449 |
450 |
for result in filtered_search_results:
451 |
452 |
453 |
if filtered_search_results:
454 |
context.append(f"Initial Search Results: {len(filtered_search_results)} items found")
455 |
reasoning_output = tool_reason(prompt, filtered_search_results)
@@ -461,7 +462,7 @@ def deep_research(prompt):
461 |
462 |
463 |
context.append(f"Initial query yielded no relevant results: {initial_query}")
464 |
465 |
# Generate current entity-specific query if applicable
466 |
elif current_entity != 'general':
467 |
entity_query = tool_generate_search_query(
@@ -469,24 +470,24 @@ def deep_research(prompt):
469 |
470 |
471 |
472 |
473 |
if entity_query:
474 |
475 |
476 |
477 |
# Search with entity focus
478 |
search_results = tool_search_web(query=entity_query)
479 |
filtered_search_results = filter_results(search_results,
480 |
f"{prompt} {current_entity}",
481 |
482 |
483 |
# Update seen snippets
484 |
for result in filtered_search_results:
485 |
486 |
487 |
if filtered_search_results:
488 |
context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
489 |
490 |
# Get entity-specific reasoning
491 |
entity_reasoning = tool_reason(
492 |
prompt=f"{prompt} focusing on {current_entity}",
@@ -494,29 +495,29 @@ def deep_research(prompt):
494 |
495 |
496 |
497 |
498 |
if entity_reasoning:
499 |
500 |
501 |
502 |
# Store in entity-specific insights dictionary for meta-analysis
503 |
if current_entity not in entity_specific_insights:
504 |
entity_specific_insights[current_entity] = []
505 |
506 |
507 |
context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...")
508 |
509 |
510 |
context.append(f"Entity query for {current_entity} yielded no relevant results")
511 |
512 |
# Generate LLM decision for next tool
513 |
llm_prompt = create_prompt(task_description, prompt, tools, context)
514 |
llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt)
515 |
516 |
if isinstance(llm_response, dict) and "error" in llm_response:
517 |
intermediate_output += f"LLM Error: {llm_response['error']}\n"
518 |
519 |
520 |
if not isinstance(llm_response, dict) or "generated_text" not in llm_response:
521 |
intermediate_output += "Error: Invalid LLM response.\n"
522 |
@@ -554,32 +555,32 @@ def deep_research(prompt):
554 |
parameters['failed_queries'] = failed_queries
555 |
parameters['focus_areas'] = focus_areas
556 |
result = tool["function"](**parameters)
557 |
558 |
if current_entity != 'general':
559 |
560 |
561 |
562 |
563 |
elif tool_name == "reason":
564 |
if current_entity != 'general' and 'reasoning_context' not in parameters:
565 |
parameters['reasoning_context'] = entity_progress[current_entity]['insights']
566 |
elif 'reasoning_context' not in parameters:
567 |
parameters['reasoning_context'] = reasoning_context[:]
568 |
569 |
if 'prompt' not in parameters:
570 |
if current_entity != 'general':
571 |
parameters['prompt'] = f"{prompt} focusing on {current_entity}"
572 |
573 |
parameters['prompt'] = prompt
574 |
575 |
if 'search_results' not in parameters:
576 |
parameters['search_results'] = []
577 |
578 |
if 'focus_areas' not in parameters and focus_areas:
579 |
parameters['focus_areas'] = focus_areas
580 |
581 |
result = tool["function"](**parameters)
582 |
583 |
if current_entity != 'general':
584 |
585 |
if current_entity not in entity_specific_insights:
@@ -587,48 +588,48 @@ def deep_research(prompt):
587 |
588 |
589 |
590 |
591 |
592 |
593 |
elif tool_name == "search_web":
594 |
result = tool_search_web(**parameters)
595 |
filtered_result = filter_results(result,
596 |
prompt if current_entity == 'general' else f"{prompt} {current_entity}",
597 |
598 |
599 |
# Update seen snippets
600 |
for r in filtered_result:
601 |
602 |
603 |
result = filtered_result
604 |
605 |
if not result:
606 |
query = parameters.get('query', '')
607 |
if query:
608 |
609 |
610 |
elif tool_name == "critique_reasoning":
611 |
if 'previous_critiques' not in parameters:
612 |
parameters['previous_critiques'] = previous_critiques
613 |
614 |
if all_insights:
615 |
if 'reasoning_output' not in parameters:
616 |
parameters['reasoning_output'] = all_insights[-1]
617 |
if 'prompt' not in parameters:
618 |
parameters['prompt'] = prompt
619 |
620 |
result = tool["function"](**parameters)
621 |
622 |
context.append(f"Critique: {result[:200]}...")
623 |
624 |
result = "No reasoning to critique yet."
625 |
626 |
elif tool_name == "identify_contradictions":
627 |
result = tool["function"](**parameters)
628 |
if result:
629 |
contradictions = result # Store for later use in summarization
630 |
context.append(f"Identified contradictions: {result}")
631 |
632 |
elif tool_name == "identify_focus_areas":
633 |
if 'failed_areas' not in parameters:
634 |
parameters['failed_areas'] = failed_areas
@@ -639,7 +640,7 @@ def deep_research(prompt):
639 |
focus_areas = result
640 |
failed_areas.extend([area for area in old_focus if area not in result])
641 |
context.append(f"New focus areas: {result}")
642 |
643 |
elif tool_name == "meta_analyze":
644 |
if 'entity_insights' not in parameters:
645 |
parameters['entity_insights'] = entity_specific_insights
@@ -649,7 +650,7 @@ def deep_research(prompt):
649 |
if result:
650 |
all_insights.append(result) # Add meta-analysis to insights
651 |
context.append(f"Meta-analysis across entities: {result[:200]}...")
652 |
653 |
654 |
result = tool["function"](**parameters)
655 |
@@ -657,9 +658,9 @@ def deep_research(prompt):
657 |
result_str = str(result)
658 |
if len(result_str) > 500:
659 |
result_str = result_str[:500] + "..."
660 |
661 |
intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
662 |
663 |
# Add truncated result to context
664 |
result_context = result_str
665 |
if len(result_str) > 300: # Even shorter for context
@@ -669,8 +670,7 @@ def deep_research(prompt):
669 |
except Exception as e:
670 |
logger.error(f"Error with {tool_name}: {str(e)}")
671 |
context.append(f"Error with {tool_name}: {str(e)}")
672 |
intermediate_output += f"Iteration {i+1} - Error: {str(e)}\
673 |
674 |
675 |
# Perform final meta-analysis if we have entity-specific insights
676 |
if len(entity_specific_insights) > 1 and len(all_insights) > 2:
@@ -687,26 +687,26 @@ def deep_research(prompt):
687 |
688 |
# Prepare the full output with detailed tracking
689 |
full_output = f"**Research Prompt:** {prompt}\n\n"
690 |
691 |
if key_entities:
692 |
full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n"
693 |
694 |
full_output += "**Research Process:**\n" + intermediate_output + "\n"
695 |
696 |
if contradictions:
697 |
full_output += "**Contradictions Identified:**\n"
698 |
for i, contradiction in enumerate(contradictions, 1):
699 |
full_output += f"{i}. {contradiction}\n"
700 |
full_output += "\n"
701 |
702 |
full_output += f"**Final Analysis:**\n{final_result}\n\n"
703 |
704 |
# Add session info for potential follow-up
705 |
full_output += f"Research Session ID: {research_session_id}\n"
706 |
full_output += f"Completed at: {'%Y-%m-%d %H:%M:%S')}\n"
707 |
full_output += f"Total iterations: {i+1}\n"
708 |
full_output += f"Total insights generated: {len(all_insights)}\n"
709 |
710 |
return full_output
711 |
712 |
# Create CSS for a more professional look
@@ -750,19 +750,20 @@ iface = gr.Interface(
750 |
["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."],
751 |
["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"]
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
# Add footer with additional information
760 |
footer_html = """
761 |
<div class="footer">
762 |
<p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p>
763 |
<p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p>
764 |
765 |
766 |
767 |
# Launch the interface
768 |
6 |
import re
7 |
from uuid import uuid4
8 |
from datetime import datetime
9 |
from duckduckgo_search import DDGS # Corrected import
10 |
from sentence_transformers import SentenceTransformer, util
11 |
from typing import List, Dict, Any, Optional, Union, Tuple
12 |
import logging
24 |
raise ValueError("Please set the HF_API_KEY environment variable.")
25 |
26 |
# You can use different models for different tasks
27 |
MAIN_LLM_ENDPOINT = "your-main-llm-endpoint" # Replace with your actual endpoint
28 |
REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed
29 |
CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed
30 |
48 |
def hf_inference(endpoint, inputs, parameters=None, retries=5):
49 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
50 |
payload = {"inputs": inputs, "parameters": parameters or {}}
51 |
52 |
for attempt in range(retries):
53 |
54 |
response =, headers=headers, json=payload, timeout=TIMEOUT)
61 |
time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff
62 |
return {"error": "Request failed after multiple retries."}
63 |
64 |
def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
65 |
time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list:
66 |
67 |
with DDGS() as ddgs: # Use the DDGS context manager
68 |
results = [r for r in ddgs.text(query, max_results=num_results, safesearch=safesearch,
69 |
time=time_filter, region=region, hreflang=language)] #Simplified call
70 |
if results:
71 |
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
72 |
73 |
return []
74 |
except Exception as e:
75 |
logger.error(f"DuckDuckGo search error: {e}")
76 |
return []
77 |
78 |
def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
79 |
critique: str = "", focus_areas: list = []) -> str:
80 |
if not search_results:
81 |
return "No search results to reason about."
82 |
83 |
reasoning_input = "Reason about the following search results in relation to the prompt:\n\n"
84 |
reasoning_input += f"Prompt: {prompt}\n\n"
85 |
86 |
if focus_areas:
87 |
reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n"
88 |
89 |
for i, result in enumerate(search_results):
90 |
reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n"
91 |
92 |
if reasoning_context:
93 |
recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
94 |
reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
95 |
96 |
if critique:
97 |
reasoning_input += f"\n\nRecent critique to address: {critique}\n"
98 |
99 |
reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results."
100 |
101 |
reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input)
109 |
def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str:
110 |
if not insights:
111 |
return "No insights to summarize."
112 |
113 |
summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
114 |
summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights
115 |
116 |
if contradictions:
117 |
summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions)
118 |
119 |
summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry"
120 |
121 |
summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input)
122 |
123 |
if isinstance(summarization_output, dict) and "generated_text" in summarization_output:
126 |
logger.error(f"Failed to generate summary: {summarization_output}")
127 |
return "Could not generate a summary due to an error."
128 |
129 |
def tool_generate_search_query(prompt: str, previous_queries: list = [],
130 |
failed_queries: list = [], focus_areas: list = []) -> str:
131 |
query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n"
132 |
133 |
if previous_queries:
134 |
recent_queries = previous_queries[-MAX_HISTORY_ITEMS:]
135 |
query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n"
136 |
137 |
if failed_queries:
138 |
query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n"
139 |
140 |
if focus_areas:
141 |
query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n"
142 |
143 |
query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n"
144 |
query_gen_input += "Search Query:"
145 |
146 |
query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input)
147 |
148 |
if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output:
149 |
return query_gen_output['generated_text'].strip()
150 |
151 |
logger.error(f"Failed to generate search query: {query_gen_output}")
152 |
return ""
153 |
154 |
def tool_critique_reasoning(reasoning_output: str, prompt: str,
155 |
previous_critiques: list = []) -> str:
156 |
critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n"
157 |
158 |
if previous_critiques:
159 |
critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n"
160 |
161 |
critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning."
162 |
163 |
critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input)
164 |
165 |
if isinstance(critique_output, dict) and "generated_text" in critique_output:
166 |
return critique_output["generated_text"].strip()
167 |
168 |
logger.error(f"Failed to generate critique: {critique_output}")
169 |
return "Could not generate a critique due to an error."
170 |
171 |
def tool_identify_contradictions(insights: list) -> list:
172 |
if len(insights) < 2:
173 |
return []
174 |
175 |
contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:])
176 |
contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'"
177 |
178 |
contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input)
179 |
180 |
if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
181 |
result = contradiction_output["generated_text"].strip()
182 |
if result == "No contradictions found.":
183 |
return []
184 |
185 |
# Extract numbered contradictions
186 |
contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
187 |
return [c.strip() for c in contradictions if c.strip()]
188 |
189 |
logger.error(f"Failed to identify contradictions: {contradiction_output}")
190 |
return []
191 |
192 |
def tool_identify_focus_areas(prompt: str, insights: list = [],
193 |
failed_areas: list = []) -> list:
194 |
focus_input = f"Based on this research prompt: '{prompt}'\n\n"
195 |
196 |
if insights:
197 |
focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights
198 |
199 |
if failed_areas:
200 |
focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n"
201 |
202 |
focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas."
203 |
204 |
focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input)
205 |
206 |
if isinstance(focus_output, dict) and "generated_text" in focus_output:
207 |
result = focus_output["generated_text"].strip()
208 |
# Extract areas, assuming they're listed with numbers, bullets, or in separate lines
209 |
areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
210 |
return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3
211 |
212 |
logger.error(f"Failed to identify focus areas: {focus_output}")
213 |
return []
214 |
215 |
def filter_results(search_results, prompt, previous_snippets=None):
216 |
if not main_similarity_model or not search_results:
217 |
return search_results
218 |
219 |
220 |
prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True)
221 |
filtered_results = []
222 |
223 |
# Keep track of snippets we've already seen
224 |
seen_snippets = set()
225 |
if previous_snippets:
226 |
227 |
228 |
for result in search_results:
229 |
combined_text = result['title'] + " " + result['snippet']
230 |
231 |
# Skip if we've seen this exact snippet before
232 |
if result['snippet'] in seen_snippets:
233 |
234 |
235 |
result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
236 |
cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item()
237 |
238 |
if cosine_score >= SIMILARITY_THRESHOLD:
239 |
result['relevance_score'] = cosine_score
240 |
241 |
242 |
243 |
# Sort by relevance score
244 |
filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
245 |
return filtered_results
251 |
# New tool: Extract entities for focused research
252 |
def tool_extract_key_entities(prompt: str) -> list:
253 |
entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line."
254 |
255 |
entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input)
256 |
257 |
if isinstance(entity_output, dict) and "generated_text" in entity_output:
258 |
result = entity_output["generated_text"].strip()
259 |
# Split by lines and clean up
260 |
entities = [e.strip() for e in result.split('\n') if e.strip()]
261 |
return entities[:5] # Limit to 5 entities
262 |
263 |
logger.error(f"Failed to extract key entities: {entity_output}")
264 |
return []
265 |
267 |
def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
268 |
if not entity_insights:
269 |
return "No entity insights to analyze."
270 |
271 |
meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n"
272 |
273 |
for entity, insights in entity_insights.items():
274 |
if insights:
275 |
meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity
276 |
277 |
meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question"
278 |
279 |
meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input)
280 |
281 |
if isinstance(meta_output, dict) and "generated_text" in meta_output:
282 |
return meta_output["generated_text"].strip()
283 |
284 |
logger.error(f"Failed to perform meta-analysis: {meta_output}")
285 |
return "Could not generate a meta-analysis due to an error."
286 |
385 |
386 |
# Only include most recent context items to avoid exceeding context limits
387 |
recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context
388 |
389 |
prompt += "\nContext (most recent items):\n"
390 |
for item in recent_context:
391 |
prompt += f"- {item}\n"
417 |
seen_snippets = set()
418 |
contradictions = []
419 |
research_session_id = str(uuid4())
420 |
421 |
# Start with entity extraction for multi-pronged research
422 |
key_entities = tool_extract_key_entities(prompt=prompt)
423 |
if key_entities:
424 |
context.append(f"Identified key entities: {key_entities}")
425 |
intermediate_output += f"Identified key entities for focused research: {key_entities}\n"
426 |
427 |
# Tracking progress for each entity
428 |
entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
429 |
entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities
430 |
431 |
for i in range(MAX_ITERATIONS):
432 |
# Decide which entity to focus on this iteration, or general research
433 |
if key_entities and i > 0:
436 |
current_entity = entities_to_process[i % len(entities_to_process)]
437 |
438 |
current_entity = 'general'
439 |
440 |
context.append(f"Current focus: {current_entity}")
441 |
442 |
# First iteration: general query and initial research
443 |
if i == 0:
444 |
initial_query = tool_generate_search_query(prompt=prompt)
447 |
448 |
search_results = tool_search_web(query=initial_query)
449 |
filtered_search_results = filter_results(search_results, prompt)
450 |
451 |
for result in filtered_search_results:
452 |
453 |
454 |
if filtered_search_results:
455 |
context.append(f"Initial Search Results: {len(filtered_search_results)} items found")
456 |
reasoning_output = tool_reason(prompt, filtered_search_results)
462 |
463 |
464 |
context.append(f"Initial query yielded no relevant results: {initial_query}")
465 |
466 |
# Generate current entity-specific query if applicable
467 |
elif current_entity != 'general':
468 |
entity_query = tool_generate_search_query(
470 |
471 |
472 |
473 |
474 |
if entity_query:
475 |
476 |
477 |
478 |
# Search with entity focus
479 |
search_results = tool_search_web(query=entity_query)
480 |
filtered_search_results = filter_results(search_results,
481 |
f"{prompt} {current_entity}",
482 |
483 |
484 |
# Update seen snippets
485 |
for result in filtered_search_results:
486 |
487 |
488 |
if filtered_search_results:
489 |
context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
490 |
491 |
# Get entity-specific reasoning
492 |
entity_reasoning = tool_reason(
493 |
prompt=f"{prompt} focusing on {current_entity}",
495 |
496 |
497 |
498 |
499 |
if entity_reasoning:
500 |
501 |
502 |
503 |
# Store in entity-specific insights dictionary for meta-analysis
504 |
if current_entity not in entity_specific_insights:
505 |
entity_specific_insights[current_entity] = []
506 |
507 |
508 |
context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...")
509 |
510 |
511 |
context.append(f"Entity query for {current_entity} yielded no relevant results")
512 |
513 |
# Generate LLM decision for next tool
514 |
llm_prompt = create_prompt(task_description, prompt, tools, context)
515 |
llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt)
516 |
517 |
if isinstance(llm_response, dict) and "error" in llm_response:
518 |
intermediate_output += f"LLM Error: {llm_response['error']}\n"
519 |
520 |
521 |
if not isinstance(llm_response, dict) or "generated_text" not in llm_response:
522 |
intermediate_output += "Error: Invalid LLM response.\n"
523 |
555 |
parameters['failed_queries'] = failed_queries
556 |
parameters['focus_areas'] = focus_areas
557 |
result = tool["function"](**parameters)
558 |
559 |
if current_entity != 'general':
560 |
561 |
562 |
563 |
564 |
elif tool_name == "reason":
565 |
if current_entity != 'general' and 'reasoning_context' not in parameters:
566 |
parameters['reasoning_context'] = entity_progress[current_entity]['insights']
567 |
elif 'reasoning_context' not in parameters:
568 |
parameters['reasoning_context'] = reasoning_context[:]
569 |
570 |
if 'prompt' not in parameters:
571 |
if current_entity != 'general':
572 |
parameters['prompt'] = f"{prompt} focusing on {current_entity}"
573 |
574 |
parameters['prompt'] = prompt
575 |
576 |
if 'search_results' not in parameters:
577 |
parameters['search_results'] = []
578 |
579 |
if 'focus_areas' not in parameters and focus_areas:
580 |
parameters['focus_areas'] = focus_areas
581 |
582 |
result = tool["function"](**parameters)
583 |
584 |
if current_entity != 'general':
585 |
586 |
if current_entity not in entity_specific_insights:
588 |
589 |
590 |
591 |
592 |
593 |
594 |
elif tool_name == "search_web":
595 |
result = tool_search_web(**parameters)
596 |
filtered_result = filter_results(result,
597 |
prompt if current_entity == 'general' else f"{prompt} {current_entity}",
598 |
599 |
600 |
# Update seen snippets
601 |
for r in filtered_result:
602 |
603 |
604 |
result = filtered_result
605 |
606 |
if not result:
607 |
query = parameters.get('query', '')
608 |
if query:
609 |
610 |
611 |
elif tool_name == "critique_reasoning":
612 |
if 'previous_critiques' not in parameters:
613 |
parameters['previous_critiques'] = previous_critiques
614 |
615 |
if all_insights:
616 |
if 'reasoning_output' not in parameters:
617 |
parameters['reasoning_output'] = all_insights[-1]
618 |
if 'prompt' not in parameters:
619 |
parameters['prompt'] = prompt
620 |
621 |
result = tool["function"](**parameters)
622 |
623 |
context.append(f"Critique: {result[:200]}...")
624 |
625 |
result = "No reasoning to critique yet."
626 |
627 |
elif tool_name == "identify_contradictions":
628 |
result = tool["function"](**parameters)
629 |
if result:
630 |
contradictions = result # Store for later use in summarization
631 |
context.append(f"Identified contradictions: {result}")
632 |
633 |
elif tool_name == "identify_focus_areas":
634 |
if 'failed_areas' not in parameters:
635 |
parameters['failed_areas'] = failed_areas
640 |
focus_areas = result
641 |
failed_areas.extend([area for area in old_focus if area not in result])
642 |
context.append(f"New focus areas: {result}")
643 |
644 |
elif tool_name == "meta_analyze":
645 |
if 'entity_insights' not in parameters:
646 |
parameters['entity_insights'] = entity_specific_insights
650 |
if result:
651 |
all_insights.append(result) # Add meta-analysis to insights
652 |
context.append(f"Meta-analysis across entities: {result[:200]}...")
653 |
654 |
655 |
result = tool["function"](**parameters)
656 |
658 |
result_str = str(result)
659 |
if len(result_str) > 500:
660 |
result_str = result_str[:500] + "..."
661 |
662 |
intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
663 |
664 |
# Add truncated result to context
665 |
result_context = result_str
666 |
if len(result_str) > 300: # Even shorter for context
670 |
except Exception as e:
671 |
logger.error(f"Error with {tool_name}: {str(e)}")
672 |
context.append(f"Error with {tool_name}: {str(e)}")
673 |
intermediate_output += f"Iteration {i+1} - Error: {str(e)}\ continue
674 |
675 |
# Perform final meta-analysis if we have entity-specific insights
676 |
if len(entity_specific_insights) > 1 and len(all_insights) > 2:
687 |
688 |
# Prepare the full output with detailed tracking
689 |
full_output = f"**Research Prompt:** {prompt}\n\n"
690 |
691 |
if key_entities:
692 |
full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n"
693 |
694 |
full_output += "**Research Process:**\n" + intermediate_output + "\n"
695 |
696 |
if contradictions:
697 |
full_output += "**Contradictions Identified:**\n"
698 |
for i, contradiction in enumerate(contradictions, 1):
699 |
full_output += f"{i}. {contradiction}\n"
700 |
full_output += "\n"
701 |
702 |
full_output += f"**Final Analysis:**\n{final_result}\n\n"
703 |
704 |
# Add session info for potential follow-up
705 |
full_output += f"Research Session ID: {research_session_id}\n"
706 |
full_output += f"Completed at: {'%Y-%m-%d %H:%M:%S')}\n"
707 |
full_output += f"Total iterations: {i+1}\n"
708 |
full_output += f"Total insights generated: {len(all_insights)}\n"
709 |
710 |
return full_output
711 |
712 |
# Create CSS for a more professional look
750 |
["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."],
751 |
["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"]
752 |
753 |
theme="default", # gr.themes.Base() is more explicit, but "default" also works
754 |
755 |
756 |
757 |
758 |
759 |
# Add footer with additional information (Optional, good for context)
760 |
footer_html = """
761 |
<div class="footer">
762 |
<p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p>
763 |
<p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p>
764 |
765 |
766 |
#iface = iface.add_html(footer_html) #gr.Interface object has no attribute add_html
767 |
768 |
# Launch the interface
769 |