Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import os | |
import time | |
import json | |
import re | |
from uuid import uuid4 | |
from datetime import datetime | |
from duckduckgo_search import DDGS # Corrected import | |
from sentence_transformers import SentenceTransformer, util | |
from typing import List, Dict, Any, Optional, Union, Tuple | |
import logging | |
import pandas as pd | |
import numpy as np | |
from collections import deque | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
# Configuration | |
HF_API_KEY = os.environ.get("HF_API_KEY") | |
if not HF_API_KEY: | |
raise ValueError("Please set the HF_API_KEY environment variable.") | |
# You can use different models for different tasks | |
MAIN_LLM_ENDPOINT = "your-main-llm-endpoint" # Replace with your actual endpoint | |
REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed | |
CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed | |
MAX_ITERATIONS = 12 # Increased from 7 | |
TIMEOUT = 60 | |
RETRY_DELAY = 5 | |
NUM_RESULTS = 10 # Increased from 7 | |
SIMILARITY_THRESHOLD = 0.15 # Lowered from 0.2 to get more potentially relevant results | |
MAX_CONTEXT_ITEMS = 20 # Prevent context from growing too large | |
MAX_HISTORY_ITEMS = 5 # For keeping track of previous queries/reasoning | |
# Load multiple embedding models for different purposes | |
try: | |
main_similarity_model = SentenceTransformer('all-mpnet-base-v2') | |
concept_similarity_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Faster, lighter model for concept matching | |
except Exception as e: | |
logger.error(f"Failed to load SentenceTransformer models: {e}") | |
main_similarity_model = None | |
concept_similarity_model = None | |
def hf_inference(endpoint, inputs, parameters=None, retries=5): | |
headers = {"Authorization": f"Bearer {HF_API_KEY}"} | |
payload = {"inputs": inputs, "parameters": parameters or {}} | |
for attempt in range(retries): | |
try: | |
response = requests.post(endpoint, headers=headers, json=payload, timeout=TIMEOUT) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
if attempt == retries - 1: | |
logger.error(f"Request failed after {retries} retries: {e}") | |
return {"error": f"Request failed after {retries} retries: {e}"} | |
time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff | |
return {"error": "Request failed after multiple retries."} | |
def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate", | |
time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list: | |
try: | |
with DDGS() as ddgs: # Use the DDGS context manager | |
results = [r for r in ddgs.text(query, max_results=num_results, safesearch=safesearch, | |
time=time_filter, region=region, hreflang=language)] #Simplified call | |
if results: | |
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results] | |
else: | |
return [] | |
except Exception as e: | |
logger.error(f"DuckDuckGo search error: {e}") | |
return [] | |
def tool_reason(prompt: str, search_results: list, reasoning_context: list = [], | |
critique: str = "", focus_areas: list = []) -> str: | |
if not search_results: | |
return "No search results to reason about." | |
reasoning_input = "Reason about the following search results in relation to the prompt:\n\n" | |
reasoning_input += f"Prompt: {prompt}\n\n" | |
if focus_areas: | |
reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n" | |
for i, result in enumerate(search_results): | |
reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n" | |
if reasoning_context: | |
recent_context = reasoning_context[-MAX_HISTORY_ITEMS:] | |
reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context) | |
if critique: | |
reasoning_input += f"\n\nRecent critique to address: {critique}\n" | |
reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results." | |
reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input) | |
if isinstance(reasoning_output, dict) and "generated_text" in reasoning_output: | |
return reasoning_output["generated_text"].strip() | |
else: | |
logger.error(f"Failed to generate reasoning: {reasoning_output}") | |
return "Could not generate reasoning due to an error." | |
def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str: | |
if not insights: | |
return "No insights to summarize." | |
summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n" | |
summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights | |
if contradictions: | |
summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions) | |
summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry" | |
summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input) | |
if isinstance(summarization_output, dict) and "generated_text" in summarization_output: | |
return summarization_output["generated_text"].strip() | |
else: | |
logger.error(f"Failed to generate summary: {summarization_output}") | |
return "Could not generate a summary due to an error." | |
def tool_generate_search_query(prompt: str, previous_queries: list = [], | |
failed_queries: list = [], focus_areas: list = []) -> str: | |
query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n" | |
if previous_queries: | |
recent_queries = previous_queries[-MAX_HISTORY_ITEMS:] | |
query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n" | |
if failed_queries: | |
query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n" | |
if focus_areas: | |
query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n" | |
query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n" | |
query_gen_input += "Search Query:" | |
query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input) | |
if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output: | |
return query_gen_output['generated_text'].strip() | |
logger.error(f"Failed to generate search query: {query_gen_output}") | |
return "" | |
def tool_critique_reasoning(reasoning_output: str, prompt: str, | |
previous_critiques: list = []) -> str: | |
critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n" | |
if previous_critiques: | |
critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n" | |
critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning." | |
critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input) | |
if isinstance(critique_output, dict) and "generated_text" in critique_output: | |
return critique_output["generated_text"].strip() | |
logger.error(f"Failed to generate critique: {critique_output}") | |
return "Could not generate a critique due to an error." | |
def tool_identify_contradictions(insights: list) -> list: | |
if len(insights) < 2: | |
return [] | |
contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) | |
contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'" | |
contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input) | |
if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output: | |
result = contradiction_output["generated_text"].strip() | |
if result == "No contradictions found.": | |
return [] | |
# Extract numbered contradictions | |
contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL) | |
return [c.strip() for c in contradictions if c.strip()] | |
logger.error(f"Failed to identify contradictions: {contradiction_output}") | |
return [] | |
def tool_identify_focus_areas(prompt: str, insights: list = [], | |
failed_areas: list = []) -> list: | |
focus_input = f"Based on this research prompt: '{prompt}'\n\n" | |
if insights: | |
focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights | |
if failed_areas: | |
focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n" | |
focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas." | |
focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input) | |
if isinstance(focus_output, dict) and "generated_text" in focus_output: | |
result = focus_output["generated_text"].strip() | |
# Extract areas, assuming they're listed with numbers, bullets, or in separate lines | |
areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result) | |
return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3 | |
logger.error(f"Failed to identify focus areas: {focus_output}") | |
return [] | |
def filter_results(search_results, prompt, previous_snippets=None): | |
if not main_similarity_model or not search_results: | |
return search_results | |
try: | |
prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True) | |
filtered_results = [] | |
# Keep track of snippets we've already seen | |
seen_snippets = set() | |
if previous_snippets: | |
seen_snippets.update(previous_snippets) | |
for result in search_results: | |
combined_text = result['title'] + " " + result['snippet'] | |
# Skip if we've seen this exact snippet before | |
if result['snippet'] in seen_snippets: | |
continue | |
result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True) | |
cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item() | |
if cosine_score >= SIMILARITY_THRESHOLD: | |
result['relevance_score'] = cosine_score | |
filtered_results.append(result) | |
seen_snippets.add(result['snippet']) | |
# Sort by relevance score | |
filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True) | |
return filtered_results | |
except Exception as e: | |
logger.error(f"Error during filtering: {e}") | |
return search_results | |
# New tool: Extract entities for focused research | |
def tool_extract_key_entities(prompt: str) -> list: | |
entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line." | |
entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input) | |
if isinstance(entity_output, dict) and "generated_text" in entity_output: | |
result = entity_output["generated_text"].strip() | |
# Split by lines and clean up | |
entities = [e.strip() for e in result.split('\n') if e.strip()] | |
return entities[:5] # Limit to 5 entities | |
logger.error(f"Failed to extract key entities: {entity_output}") | |
return [] | |
# New tool: Meta-analyze across entities | |
def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str: | |
if not entity_insights: | |
return "No entity insights to analyze." | |
meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n" | |
for entity, insights in entity_insights.items(): | |
if insights: | |
meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity | |
meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question" | |
meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input) | |
if isinstance(meta_output, dict) and "generated_text" in meta_output: | |
return meta_output["generated_text"].strip() | |
logger.error(f"Failed to perform meta-analysis: {meta_output}") | |
return "Could not generate a meta-analysis due to an error." | |
# Update tools dictionary with enhanced functionality | |
tools = { | |
"search_web": { | |
"function": tool_search_web, | |
"description": "Searches the web for information.", | |
"parameters": { | |
"query": {"type": "string", "description": "The search query."}, | |
"num_results": {"type": "integer", "description": "Number of results to return."}, | |
"time_filter": {"type": "string", "description": "Optional time filter (d, w, m, y)."}, | |
"region": {"type": "string", "description": "Optional region code."}, | |
"language": {"type": "string", "description": "Optional language code."} | |
}, | |
}, | |
"reason": { | |
"function": tool_reason, | |
"description": "Analyzes and reasons about information.", | |
"parameters": { | |
"prompt": {"type": "string", "description": "The original prompt."}, | |
"search_results": {"type": "array", "description": "Search results to analyze."}, | |
"reasoning_context": {"type": "array", "description": "Previous reasoning outputs."}, | |
"critique": {"type": "string", "description": "Recent critique to address."}, | |
"focus_areas": {"type": "array", "description": "Specific aspects to focus on."} | |
}, | |
}, | |
"summarize": { | |
"function": tool_summarize, | |
"description": "Synthesizes insights into a cohesive summary.", | |
"parameters": { | |
"insights": {"type": "array", "description": "Insights to summarize."}, | |
"prompt": {"type": "string", "description": "The original research prompt."}, | |
"contradictions": {"type": "array", "description": "Specific contradictions to address."} | |
}, | |
}, | |
"generate_search_query": { | |
"function": tool_generate_search_query, | |
"description": "Generates an optimized search query", | |
"parameters":{ | |
"prompt": {"type": "string", "description": "The original user prompt."}, | |
"previous_queries": {"type": "array", "description": "Previously used search queries."}, | |
"failed_queries": {"type": "array", "description": "Queries that didn't yield good results."}, | |
"focus_areas": {"type": "array", "description": "Specific aspects to focus on."} | |
} | |
}, | |
"critique_reasoning": { | |
"function": tool_critique_reasoning, | |
"description": "Critically evaluates reasoning output.", | |
"parameters": { | |
"reasoning_output": {"type": "string", "description": "The reasoning output to critique."}, | |
"prompt": {"type": "string", "description": "The original prompt."}, | |
"previous_critiques": {"type": "array", "description": "Previous critique outputs."} | |
}, | |
}, | |
"identify_contradictions": { | |
"function": tool_identify_contradictions, | |
"description": "Identifies contradictions across multiple insights.", | |
"parameters": { | |
"insights": {"type": "array", "description": "Collection of insights to analyze for contradictions."}, | |
}, | |
}, | |
"identify_focus_areas": { | |
"function": tool_identify_focus_areas, | |
"description": "Identifies specific aspects that need further investigation.", | |
"parameters": { | |
"prompt": {"type": "string", "description": "The original research prompt."}, | |
"insights": {"type": "array", "description": "Existing insights to build upon."}, | |
"failed_areas": {"type": "array", "description": "Previously tried areas that yielded poor results."} | |
}, | |
}, | |
"extract_key_entities": { | |
"function": tool_extract_key_entities, | |
"description": "Extracts key entities from the prompt for focused research.", | |
"parameters": { | |
"prompt": {"type": "string", "description": "The original research prompt."} | |
}, | |
}, | |
"meta_analyze": { | |
"function": tool_meta_analyze, | |
"description": "Performs meta-analysis across entity-specific insights.", | |
"parameters": { | |
"entity_insights": {"type": "object", "description": "Dictionary mapping entities to their insights."}, | |
"prompt": {"type": "string", "description": "The original research prompt."} | |
}, | |
} | |
} | |
def create_prompt(task_description, user_input, available_tools, context): | |
prompt = f"""{task_description} | |
User Input: | |
{user_input} | |
Available Tools: | |
""" | |
for tool_name, tool_data in available_tools.items(): | |
prompt += f"- {tool_name}: {tool_data['description']}\n" | |
prompt += " Parameters:\n" | |
for param_name, param_data in tool_data["parameters"].items(): | |
prompt += f" - {param_name} ({param_data['type']}): {param_data['description']}\n" | |
# Only include most recent context items to avoid exceeding context limits | |
recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context | |
prompt += "\nContext (most recent items):\n" | |
for item in recent_context: | |
prompt += f"- {item}\n" | |
prompt += """ | |
Instructions: | |
Select the BEST tool and parameters for the current research stage. Output valid JSON. If no tool is appropriate, respond with {}. | |
Only use provided tools. Be strategic about which tool to use next based on the research progress so far. | |
Example: | |
{"tool": "search_web", "parameters": {"query": "Eiffel Tower location"}} | |
Output: | |
""" | |
return prompt | |
def deep_research(prompt): | |
task_description = "You are an advanced research assistant that can perform deep, multi-stage analysis. Use available tools iteratively, focus on different aspects, follow promising leads, and critically evaluate your findings." | |
context = [] | |
all_insights = [] | |
entity_specific_insights = {} | |
intermediate_output = "" | |
previous_queries = [] | |
failed_queries = [] | |
reasoning_context = [] | |
previous_critiques = [] | |
focus_areas = [] | |
failed_areas = [] | |
seen_snippets = set() | |
contradictions = [] | |
research_session_id = str(uuid4()) | |
# Start with entity extraction for multi-pronged research | |
key_entities = tool_extract_key_entities(prompt=prompt) | |
if key_entities: | |
context.append(f"Identified key entities: {key_entities}") | |
intermediate_output += f"Identified key entities for focused research: {key_entities}\n" | |
# Tracking progress for each entity | |
entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities} | |
entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities | |
for i in range(MAX_ITERATIONS): | |
# Decide which entity to focus on this iteration, or general research | |
if key_entities and i > 0: | |
# Simple round-robin for entities, with general research every few iterations | |
entities_to_process = key_entities + ['general'] | |
current_entity = entities_to_process[i % len(entities_to_process)] | |
else: | |
current_entity = 'general' | |
context.append(f"Current focus: {current_entity}") | |
# First iteration: general query and initial research | |
if i == 0: | |
initial_query = tool_generate_search_query(prompt=prompt) | |
if initial_query: | |
previous_queries.append(initial_query) | |
entity_progress['general']['queries'].append(initial_query) | |
search_results = tool_search_web(query=initial_query) | |
filtered_search_results = filter_results(search_results, prompt) | |
for result in filtered_search_results: | |
seen_snippets.add(result['snippet']) | |
if filtered_search_results: | |
context.append(f"Initial Search Results: {len(filtered_search_results)} items found") | |
reasoning_output = tool_reason(prompt, filtered_search_results) | |
if reasoning_output: | |
all_insights.append(reasoning_output) | |
entity_progress['general']['insights'].append(reasoning_output) | |
reasoning_context.append(reasoning_output) | |
context.append(f"Initial Reasoning: {reasoning_output[:200]}...") | |
else: | |
failed_queries.append(initial_query) | |
context.append(f"Initial query yielded no relevant results: {initial_query}") | |
# Generate current entity-specific query if applicable | |
elif current_entity != 'general': | |
entity_query = tool_generate_search_query( | |
prompt=f"{prompt} focusing specifically on {current_entity}", | |
previous_queries=entity_progress[current_entity]['queries'], | |
focus_areas=focus_areas | |
) | |
if entity_query: | |
previous_queries.append(entity_query) | |
entity_progress[current_entity]['queries'].append(entity_query) | |
# Search with entity focus | |
search_results = tool_search_web(query=entity_query) | |
filtered_search_results = filter_results(search_results, | |
f"{prompt} {current_entity}", | |
previous_snippets=seen_snippets) | |
# Update seen snippets | |
for result in filtered_search_results: | |
seen_snippets.add(result['snippet']) | |
if filtered_search_results: | |
context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results") | |
# Get entity-specific reasoning | |
entity_reasoning = tool_reason( | |
prompt=f"{prompt} focusing on {current_entity}", | |
search_results=filtered_search_results, | |
reasoning_context=entity_progress[current_entity]['insights'], | |
focus_areas=focus_areas | |
) | |
if entity_reasoning: | |
all_insights.append(entity_reasoning) | |
entity_progress[current_entity]['insights'].append(entity_reasoning) | |
# Store in entity-specific insights dictionary for meta-analysis | |
if current_entity not in entity_specific_insights: | |
entity_specific_insights[current_entity] = [] | |
entity_specific_insights[current_entity].append(entity_reasoning) | |
context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...") | |
else: | |
failed_queries.append(entity_query) | |
context.append(f"Entity query for {current_entity} yielded no relevant results") | |
# Generate LLM decision for next tool | |
llm_prompt = create_prompt(task_description, prompt, tools, context) | |
llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt) | |
if isinstance(llm_response, dict) and "error" in llm_response: | |
intermediate_output += f"LLM Error: {llm_response['error']}\n" | |
continue | |
if not isinstance(llm_response, dict) or "generated_text" not in llm_response: | |
intermediate_output += "Error: Invalid LLM response.\n" | |
continue | |
try: | |
response_text = llm_response["generated_text"].strip() | |
response_json = json.loads(response_text) | |
intermediate_output += f"Iteration {i+1} - Focus: {current_entity} - Action: {response_text}\n" | |
except json.JSONDecodeError: | |
intermediate_output += f"Iteration {i+1} - LLM Response (Invalid JSON): {llm_response['generated_text'][:100]}...\n" | |
context.append(f"Invalid JSON: {llm_response['generated_text'][:100]}...") | |
continue | |
tool_name = response_json.get("tool") | |
parameters = response_json.get("parameters", {}) | |
if not tool_name: | |
if all_insights: | |
# If we have insights but no tool selected, maybe we're done | |
if i > MAX_ITERATIONS // 2: # Only consider ending early after half the iterations | |
break | |
continue | |
if tool_name not in tools: | |
context.append(f"Invalid tool: {tool_name}") | |
intermediate_output += f"Iteration {i + 1} - Invalid tool chosen: {tool_name}\n" | |
continue | |
tool = tools[tool_name] | |
try: | |
intermediate_output += f"Iteration {i+1} - Executing: {tool_name}, Key params: {str(parameters)[:100]}...\n" | |
if tool_name == "generate_search_query": | |
parameters['previous_queries'] = previous_queries | |
parameters['failed_queries'] = failed_queries | |
parameters['focus_areas'] = focus_areas | |
result = tool["function"](**parameters) | |
if current_entity != 'general': | |
entity_progress[current_entity]['queries'].append(result) | |
previous_queries.append(result) | |
elif tool_name == "reason": | |
if current_entity != 'general' and 'reasoning_context' not in parameters: | |
parameters['reasoning_context'] = entity_progress[current_entity]['insights'] | |
elif 'reasoning_context' not in parameters: | |
parameters['reasoning_context'] = reasoning_context[:] | |
if 'prompt' not in parameters: | |
if current_entity != 'general': | |
parameters['prompt'] = f"{prompt} focusing on {current_entity}" | |
else: | |
parameters['prompt'] = prompt | |
if 'search_results' not in parameters: | |
parameters['search_results'] = [] | |
if 'focus_areas' not in parameters and focus_areas: | |
parameters['focus_areas'] = focus_areas | |
result = tool["function"](**parameters) | |
if current_entity != 'general': | |
entity_progress[current_entity]['insights'].append(result) | |
if current_entity not in entity_specific_insights: | |
entity_specific_insights[current_entity] = [] | |
entity_specific_insights[current_entity].append(result) | |
else: | |
reasoning_context.append(result) | |
all_insights.append(result) | |
elif tool_name == "search_web": | |
result = tool_search_web(**parameters) | |
filtered_result = filter_results(result, | |
prompt if current_entity == 'general' else f"{prompt} {current_entity}", | |
previous_snippets=seen_snippets) | |
# Update seen snippets | |
for r in filtered_result: | |
seen_snippets.add(r['snippet']) | |
result = filtered_result | |
if not result: | |
query = parameters.get('query', '') | |
if query: | |
failed_queries.append(query) | |
elif tool_name == "critique_reasoning": | |
if 'previous_critiques' not in parameters: | |
parameters['previous_critiques'] = previous_critiques | |
if all_insights: | |
if 'reasoning_output' not in parameters: | |
parameters['reasoning_output'] = all_insights[-1] | |
if 'prompt' not in parameters: | |
parameters['prompt'] = prompt | |
result = tool["function"](**parameters) | |
previous_critiques.append(result) | |
context.append(f"Critique: {result[:200]}...") | |
else: | |
result = "No reasoning to critique yet." | |
elif tool_name == "identify_contradictions": | |
result = tool["function"](**parameters) | |
if result: | |
contradictions = result # Store for later use in summarization | |
context.append(f"Identified contradictions: {result}") | |
elif tool_name == "identify_focus_areas": | |
if 'failed_areas' not in parameters: | |
parameters['failed_areas'] = failed_areas | |
result = tool["function"](**parameters) | |
if result: | |
# Update focus areas, but keep track of ones that didn't yield results | |
old_focus = set(focus_areas) | |
focus_areas = result | |
failed_areas.extend([area for area in old_focus if area not in result]) | |
context.append(f"New focus areas: {result}") | |
elif tool_name == "meta_analyze": | |
if 'entity_insights' not in parameters: | |
parameters['entity_insights'] = entity_specific_insights | |
if 'prompt' not in parameters: | |
parameters['prompt'] = prompt | |
result = tool["function"](**parameters) | |
if result: | |
all_insights.append(result) # Add meta-analysis to insights | |
context.append(f"Meta-analysis across entities: {result[:200]}...") | |
else: | |
result = tool["function"](**parameters) | |
# Truncate very long results for the intermediate output | |
result_str = str(result) | |
if len(result_str) > 500: | |
result_str = result_str[:500] + "..." | |
intermediate_output += f"Iteration {i+1} - Result: {result_str}\n" | |
# Add truncated result to context | |
result_context = result_str | |
if len(result_str) > 300: # Even shorter for context | |
result_context = result_str[:300] + "..." | |
context.append(f"Used: {tool_name}, Result: {result_context}") | |
except Exception as e: | |
logger.error(f"Error with {tool_name}: {str(e)}") | |
context.append(f"Error with {tool_name}: {str(e)}") | |
intermediate_output += f"Iteration {i+1} - Error: {str(e)}\ continue | |
# Perform final meta-analysis if we have entity-specific insights | |
if len(entity_specific_insights) > 1 and len(all_insights) > 2: | |
meta_analysis = tool_meta_analyze(entity_insights=entity_specific_insights, prompt=prompt) | |
if meta_analysis: | |
all_insights.append(meta_analysis) | |
intermediate_output += f"Final Meta-Analysis: {meta_analysis[:500]}...\n" | |
# Generate the final summary | |
if all_insights: | |
final_result = tool_summarize(all_insights, prompt, contradictions) | |
else: | |
final_result = "Could not find meaningful information despite multiple attempts." | |
# Prepare the full output with detailed tracking | |
full_output = f"**Research Prompt:** {prompt}\n\n" | |
if key_entities: | |
full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n" | |
full_output += "**Research Process:**\n" + intermediate_output + "\n" | |
if contradictions: | |
full_output += "**Contradictions Identified:**\n" | |
for i, contradiction in enumerate(contradictions, 1): | |
full_output += f"{i}. {contradiction}\n" | |
full_output += "\n" | |
full_output += f"**Final Analysis:**\n{final_result}\n\n" | |
# Add session info for potential follow-up | |
full_output += f"Research Session ID: {research_session_id}\n" | |
full_output += f"Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" | |
full_output += f"Total iterations: {i+1}\n" | |
full_output += f"Total insights generated: {len(all_insights)}\n" | |
return full_output | |
# Create CSS for a more professional look | |
custom_css = """ | |
.gradio-container { | |
background-color: #f7f9fc; | |
} | |
.output-box { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
line-height: 1.5; | |
} | |
h3 { | |
color: #2c3e50; | |
font-weight: 600; | |
} | |
.footer { | |
text-align: center; | |
margin-top: 20px; | |
color: #7f8c8d; | |
font-size: 0.9em; | |
} | |
""" | |
# Create the Gradio interface with enhanced UI | |
iface = gr.Interface( | |
fn=deep_research, | |
inputs=[ | |
gr.Textbox(lines=5, placeholder="Enter your research question...", label="Research Question") | |
], | |
outputs=gr.Textbox(lines=30, placeholder="Research results will appear here...", label="Research Results", elem_classes=["output-box"]), | |
title="Advanced Multi-Stage Research Assistant", | |
description="""This tool performs deep, multi-faceted research by: | |
1. Breaking down complex topics into key entities and aspects | |
2. Iteratively searching, reasoning, and critiquing findings | |
3. Exploring different perspectives and addressing contradictions | |
4. Synthesizing insights across multiple information sources""", | |
examples=[ | |
["What are the key factors affecting urban tree survival and how do they vary between developing and developed countries?"], | |
["Compare and contrast the economic policies of China and the United States over the past two decades, analyzing their impacts on global trade."], | |
["What are the most promising approaches to quantum computing and what are their respective advantages and limitations?"], | |
["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."], | |
["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"] | |
], | |
theme="default", # gr.themes.Base() is more explicit, but "default" also works | |
css=custom_css, | |
allow_flagging=False, | |
analytics_enabled=False, | |
) | |
# Add footer with additional information (Optional, good for context) | |
footer_html = """ | |
<div class="footer"> | |
<p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p> | |
<p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p> | |
</div> | |
""" | |
#iface = iface.add_html(footer_html) #gr.Interface object has no attribute add_html | |
# Launch the interface | |
iface.launch(share=False) |