Reality123b commited on
Commit
afd36d8
·
verified ·
1 Parent(s): 59a7838

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -116
app.py CHANGED
@@ -6,7 +6,7 @@ import json
6
  import re
7
  from uuid import uuid4
8
  from datetime import datetime
9
- from duckduckgo_search import ddg
10
  from sentence_transformers import SentenceTransformer, util
11
  from typing import List, Dict, Any, Optional, Union, Tuple
12
  import logging
@@ -24,7 +24,7 @@ if not HF_API_KEY:
24
  raise ValueError("Please set the HF_API_KEY environment variable.")
25
 
26
  # You can use different models for different tasks
27
- MAIN_LLM_ENDPOINT = "your-main-llm-endpoint"
28
  REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed
29
  CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed
30
 
@@ -48,7 +48,7 @@ except Exception as e:
48
  def hf_inference(endpoint, inputs, parameters=None, retries=5):
49
  headers = {"Authorization": f"Bearer {HF_API_KEY}"}
50
  payload = {"inputs": inputs, "parameters": parameters or {}}
51
-
52
  for attempt in range(retries):
53
  try:
54
  response = requests.post(endpoint, headers=headers, json=payload, timeout=TIMEOUT)
@@ -61,40 +61,41 @@ def hf_inference(endpoint, inputs, parameters=None, retries=5):
61
  time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff
62
  return {"error": "Request failed after multiple retries."}
63
 
64
- def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
65
  time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list:
66
  try:
67
- results = ddg(query, max_results=num_results, safesearch=safesearch,
68
- time=time_filter, region=region, language=language)
69
- if results:
70
- return [{"title": r["title"], "snippet": r["snippet"], "url": r["href"]} for r in results]
71
- else:
72
- return []
 
73
  except Exception as e:
74
  logger.error(f"DuckDuckGo search error: {e}")
75
  return []
76
 
77
- def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
78
  critique: str = "", focus_areas: list = []) -> str:
79
  if not search_results:
80
  return "No search results to reason about."
81
-
82
  reasoning_input = "Reason about the following search results in relation to the prompt:\n\n"
83
  reasoning_input += f"Prompt: {prompt}\n\n"
84
-
85
  if focus_areas:
86
  reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n"
87
-
88
  for i, result in enumerate(search_results):
89
  reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n"
90
-
91
  if reasoning_context:
92
  recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
93
  reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
94
-
95
  if critique:
96
  reasoning_input += f"\n\nRecent critique to address: {critique}\n"
97
-
98
  reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results."
99
 
100
  reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input)
@@ -108,15 +109,15 @@ def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
108
  def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str:
109
  if not insights:
110
  return "No insights to summarize."
111
-
112
  summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
113
  summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights
114
-
115
  if contradictions:
116
  summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions)
117
-
118
  summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry"
119
-
120
  summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input)
121
 
122
  if isinstance(summarization_output, dict) and "generated_text" in summarization_output:
@@ -125,120 +126,120 @@ def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> st
125
  logger.error(f"Failed to generate summary: {summarization_output}")
126
  return "Could not generate a summary due to an error."
127
 
128
- def tool_generate_search_query(prompt: str, previous_queries: list = [],
129
  failed_queries: list = [], focus_areas: list = []) -> str:
130
  query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n"
131
-
132
  if previous_queries:
133
  recent_queries = previous_queries[-MAX_HISTORY_ITEMS:]
134
  query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n"
135
-
136
  if failed_queries:
137
  query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n"
138
-
139
  if focus_areas:
140
  query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n"
141
-
142
  query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n"
143
  query_gen_input += "Search Query:"
144
-
145
  query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input)
146
 
147
  if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output:
148
  return query_gen_output['generated_text'].strip()
149
-
150
  logger.error(f"Failed to generate search query: {query_gen_output}")
151
  return ""
152
 
153
- def tool_critique_reasoning(reasoning_output: str, prompt: str,
154
  previous_critiques: list = []) -> str:
155
  critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n"
156
-
157
  if previous_critiques:
158
  critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n"
159
-
160
  critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning."
161
-
162
  critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input)
163
-
164
  if isinstance(critique_output, dict) and "generated_text" in critique_output:
165
  return critique_output["generated_text"].strip()
166
-
167
  logger.error(f"Failed to generate critique: {critique_output}")
168
  return "Could not generate a critique due to an error."
169
 
170
  def tool_identify_contradictions(insights: list) -> list:
171
  if len(insights) < 2:
172
  return []
173
-
174
  contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:])
175
  contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'"
176
-
177
  contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input)
178
-
179
  if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
180
  result = contradiction_output["generated_text"].strip()
181
  if result == "No contradictions found.":
182
  return []
183
-
184
  # Extract numbered contradictions
185
  contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
186
  return [c.strip() for c in contradictions if c.strip()]
187
-
188
  logger.error(f"Failed to identify contradictions: {contradiction_output}")
189
  return []
190
 
191
- def tool_identify_focus_areas(prompt: str, insights: list = [],
192
  failed_areas: list = []) -> list:
193
  focus_input = f"Based on this research prompt: '{prompt}'\n\n"
194
-
195
  if insights:
196
  focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights
197
-
198
  if failed_areas:
199
  focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n"
200
-
201
  focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas."
202
-
203
  focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input)
204
-
205
  if isinstance(focus_output, dict) and "generated_text" in focus_output:
206
  result = focus_output["generated_text"].strip()
207
  # Extract areas, assuming they're listed with numbers, bullets, or in separate lines
208
  areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
209
  return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3
210
-
211
  logger.error(f"Failed to identify focus areas: {focus_output}")
212
  return []
213
 
214
  def filter_results(search_results, prompt, previous_snippets=None):
215
  if not main_similarity_model or not search_results:
216
  return search_results
217
-
218
  try:
219
  prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True)
220
  filtered_results = []
221
-
222
  # Keep track of snippets we've already seen
223
  seen_snippets = set()
224
  if previous_snippets:
225
  seen_snippets.update(previous_snippets)
226
-
227
  for result in search_results:
228
  combined_text = result['title'] + " " + result['snippet']
229
-
230
  # Skip if we've seen this exact snippet before
231
  if result['snippet'] in seen_snippets:
232
  continue
233
-
234
  result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
235
  cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item()
236
-
237
  if cosine_score >= SIMILARITY_THRESHOLD:
238
  result['relevance_score'] = cosine_score
239
  filtered_results.append(result)
240
  seen_snippets.add(result['snippet'])
241
-
242
  # Sort by relevance score
243
  filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
244
  return filtered_results
@@ -250,15 +251,15 @@ def filter_results(search_results, prompt, previous_snippets=None):
250
  # New tool: Extract entities for focused research
251
  def tool_extract_key_entities(prompt: str) -> list:
252
  entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line."
253
-
254
  entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input)
255
-
256
  if isinstance(entity_output, dict) and "generated_text" in entity_output:
257
  result = entity_output["generated_text"].strip()
258
  # Split by lines and clean up
259
  entities = [e.strip() for e in result.split('\n') if e.strip()]
260
  return entities[:5] # Limit to 5 entities
261
-
262
  logger.error(f"Failed to extract key entities: {entity_output}")
263
  return []
264
 
@@ -266,20 +267,20 @@ def tool_extract_key_entities(prompt: str) -> list:
266
  def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
267
  if not entity_insights:
268
  return "No entity insights to analyze."
269
-
270
  meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n"
271
-
272
  for entity, insights in entity_insights.items():
273
  if insights:
274
  meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity
275
-
276
  meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question"
277
-
278
  meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input)
279
-
280
  if isinstance(meta_output, dict) and "generated_text" in meta_output:
281
  return meta_output["generated_text"].strip()
282
-
283
  logger.error(f"Failed to perform meta-analysis: {meta_output}")
284
  return "Could not generate a meta-analysis due to an error."
285
 
@@ -384,7 +385,7 @@ Available Tools:
384
 
385
  # Only include most recent context items to avoid exceeding context limits
386
  recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context
387
-
388
  prompt += "\nContext (most recent items):\n"
389
  for item in recent_context:
390
  prompt += f"- {item}\n"
@@ -416,17 +417,17 @@ def deep_research(prompt):
416
  seen_snippets = set()
417
  contradictions = []
418
  research_session_id = str(uuid4())
419
-
420
  # Start with entity extraction for multi-pronged research
421
  key_entities = tool_extract_key_entities(prompt=prompt)
422
  if key_entities:
423
  context.append(f"Identified key entities: {key_entities}")
424
  intermediate_output += f"Identified key entities for focused research: {key_entities}\n"
425
-
426
  # Tracking progress for each entity
427
  entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
428
  entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities
429
-
430
  for i in range(MAX_ITERATIONS):
431
  # Decide which entity to focus on this iteration, or general research
432
  if key_entities and i > 0:
@@ -435,9 +436,9 @@ def deep_research(prompt):
435
  current_entity = entities_to_process[i % len(entities_to_process)]
436
  else:
437
  current_entity = 'general'
438
-
439
  context.append(f"Current focus: {current_entity}")
440
-
441
  # First iteration: general query and initial research
442
  if i == 0:
443
  initial_query = tool_generate_search_query(prompt=prompt)
@@ -446,10 +447,10 @@ def deep_research(prompt):
446
  entity_progress['general']['queries'].append(initial_query)
447
  search_results = tool_search_web(query=initial_query)
448
  filtered_search_results = filter_results(search_results, prompt)
449
-
450
  for result in filtered_search_results:
451
  seen_snippets.add(result['snippet'])
452
-
453
  if filtered_search_results:
454
  context.append(f"Initial Search Results: {len(filtered_search_results)} items found")
455
  reasoning_output = tool_reason(prompt, filtered_search_results)
@@ -461,7 +462,7 @@ def deep_research(prompt):
461
  else:
462
  failed_queries.append(initial_query)
463
  context.append(f"Initial query yielded no relevant results: {initial_query}")
464
-
465
  # Generate current entity-specific query if applicable
466
  elif current_entity != 'general':
467
  entity_query = tool_generate_search_query(
@@ -469,24 +470,24 @@ def deep_research(prompt):
469
  previous_queries=entity_progress[current_entity]['queries'],
470
  focus_areas=focus_areas
471
  )
472
-
473
  if entity_query:
474
  previous_queries.append(entity_query)
475
  entity_progress[current_entity]['queries'].append(entity_query)
476
-
477
  # Search with entity focus
478
  search_results = tool_search_web(query=entity_query)
479
- filtered_search_results = filter_results(search_results,
480
  f"{prompt} {current_entity}",
481
  previous_snippets=seen_snippets)
482
-
483
  # Update seen snippets
484
  for result in filtered_search_results:
485
  seen_snippets.add(result['snippet'])
486
-
487
  if filtered_search_results:
488
  context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
489
-
490
  # Get entity-specific reasoning
491
  entity_reasoning = tool_reason(
492
  prompt=f"{prompt} focusing on {current_entity}",
@@ -494,29 +495,29 @@ def deep_research(prompt):
494
  reasoning_context=entity_progress[current_entity]['insights'],
495
  focus_areas=focus_areas
496
  )
497
-
498
  if entity_reasoning:
499
  all_insights.append(entity_reasoning)
500
  entity_progress[current_entity]['insights'].append(entity_reasoning)
501
-
502
  # Store in entity-specific insights dictionary for meta-analysis
503
  if current_entity not in entity_specific_insights:
504
  entity_specific_insights[current_entity] = []
505
  entity_specific_insights[current_entity].append(entity_reasoning)
506
-
507
  context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...")
508
  else:
509
  failed_queries.append(entity_query)
510
  context.append(f"Entity query for {current_entity} yielded no relevant results")
511
-
512
  # Generate LLM decision for next tool
513
  llm_prompt = create_prompt(task_description, prompt, tools, context)
514
  llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt)
515
-
516
  if isinstance(llm_response, dict) and "error" in llm_response:
517
  intermediate_output += f"LLM Error: {llm_response['error']}\n"
518
  continue
519
-
520
  if not isinstance(llm_response, dict) or "generated_text" not in llm_response:
521
  intermediate_output += "Error: Invalid LLM response.\n"
522
  continue
@@ -554,32 +555,32 @@ def deep_research(prompt):
554
  parameters['failed_queries'] = failed_queries
555
  parameters['focus_areas'] = focus_areas
556
  result = tool["function"](**parameters)
557
-
558
  if current_entity != 'general':
559
  entity_progress[current_entity]['queries'].append(result)
560
-
561
  previous_queries.append(result)
562
-
563
  elif tool_name == "reason":
564
  if current_entity != 'general' and 'reasoning_context' not in parameters:
565
  parameters['reasoning_context'] = entity_progress[current_entity]['insights']
566
  elif 'reasoning_context' not in parameters:
567
  parameters['reasoning_context'] = reasoning_context[:]
568
-
569
  if 'prompt' not in parameters:
570
  if current_entity != 'general':
571
  parameters['prompt'] = f"{prompt} focusing on {current_entity}"
572
  else:
573
  parameters['prompt'] = prompt
574
-
575
  if 'search_results' not in parameters:
576
  parameters['search_results'] = []
577
-
578
  if 'focus_areas' not in parameters and focus_areas:
579
  parameters['focus_areas'] = focus_areas
580
-
581
  result = tool["function"](**parameters)
582
-
583
  if current_entity != 'general':
584
  entity_progress[current_entity]['insights'].append(result)
585
  if current_entity not in entity_specific_insights:
@@ -587,48 +588,48 @@ def deep_research(prompt):
587
  entity_specific_insights[current_entity].append(result)
588
  else:
589
  reasoning_context.append(result)
590
-
591
  all_insights.append(result)
592
-
593
  elif tool_name == "search_web":
594
  result = tool_search_web(**parameters)
595
- filtered_result = filter_results(result,
596
  prompt if current_entity == 'general' else f"{prompt} {current_entity}",
597
  previous_snippets=seen_snippets)
598
-
599
  # Update seen snippets
600
  for r in filtered_result:
601
  seen_snippets.add(r['snippet'])
602
-
603
  result = filtered_result
604
-
605
  if not result:
606
  query = parameters.get('query', '')
607
  if query:
608
  failed_queries.append(query)
609
-
610
  elif tool_name == "critique_reasoning":
611
  if 'previous_critiques' not in parameters:
612
  parameters['previous_critiques'] = previous_critiques
613
-
614
  if all_insights:
615
  if 'reasoning_output' not in parameters:
616
  parameters['reasoning_output'] = all_insights[-1]
617
  if 'prompt' not in parameters:
618
  parameters['prompt'] = prompt
619
-
620
  result = tool["function"](**parameters)
621
  previous_critiques.append(result)
622
  context.append(f"Critique: {result[:200]}...")
623
  else:
624
  result = "No reasoning to critique yet."
625
-
626
  elif tool_name == "identify_contradictions":
627
  result = tool["function"](**parameters)
628
  if result:
629
  contradictions = result # Store for later use in summarization
630
  context.append(f"Identified contradictions: {result}")
631
-
632
  elif tool_name == "identify_focus_areas":
633
  if 'failed_areas' not in parameters:
634
  parameters['failed_areas'] = failed_areas
@@ -639,7 +640,7 @@ def deep_research(prompt):
639
  focus_areas = result
640
  failed_areas.extend([area for area in old_focus if area not in result])
641
  context.append(f"New focus areas: {result}")
642
-
643
  elif tool_name == "meta_analyze":
644
  if 'entity_insights' not in parameters:
645
  parameters['entity_insights'] = entity_specific_insights
@@ -649,7 +650,7 @@ def deep_research(prompt):
649
  if result:
650
  all_insights.append(result) # Add meta-analysis to insights
651
  context.append(f"Meta-analysis across entities: {result[:200]}...")
652
-
653
  else:
654
  result = tool["function"](**parameters)
655
 
@@ -657,9 +658,9 @@ def deep_research(prompt):
657
  result_str = str(result)
658
  if len(result_str) > 500:
659
  result_str = result_str[:500] + "..."
660
-
661
  intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
662
-
663
  # Add truncated result to context
664
  result_context = result_str
665
  if len(result_str) > 300: # Even shorter for context
@@ -669,8 +670,7 @@ def deep_research(prompt):
669
  except Exception as e:
670
  logger.error(f"Error with {tool_name}: {str(e)}")
671
  context.append(f"Error with {tool_name}: {str(e)}")
672
- intermediate_output += f"Iteration {i+1} - Error: {str(e)}\n"
673
- continue
674
 
675
  # Perform final meta-analysis if we have entity-specific insights
676
  if len(entity_specific_insights) > 1 and len(all_insights) > 2:
@@ -687,26 +687,26 @@ def deep_research(prompt):
687
 
688
  # Prepare the full output with detailed tracking
689
  full_output = f"**Research Prompt:** {prompt}\n\n"
690
-
691
  if key_entities:
692
  full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n"
693
-
694
  full_output += "**Research Process:**\n" + intermediate_output + "\n"
695
-
696
  if contradictions:
697
  full_output += "**Contradictions Identified:**\n"
698
  for i, contradiction in enumerate(contradictions, 1):
699
  full_output += f"{i}. {contradiction}\n"
700
  full_output += "\n"
701
-
702
  full_output += f"**Final Analysis:**\n{final_result}\n\n"
703
-
704
  # Add session info for potential follow-up
705
  full_output += f"Research Session ID: {research_session_id}\n"
706
  full_output += f"Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
707
  full_output += f"Total iterations: {i+1}\n"
708
  full_output += f"Total insights generated: {len(all_insights)}\n"
709
-
710
  return full_output
711
 
712
  # Create CSS for a more professional look
@@ -750,19 +750,20 @@ iface = gr.Interface(
750
  ["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."],
751
  ["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"]
752
  ],
753
- theme="default",
754
  css=custom_css,
755
  allow_flagging=False,
756
  analytics_enabled=False,
757
  )
758
 
759
- # Add footer with additional information
760
  footer_html = """
761
  <div class="footer">
762
  <p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p>
763
  <p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p>
764
  </div>
765
  """
 
766
 
767
  # Launch the interface
768
  iface.launch(share=False)
 
6
  import re
7
  from uuid import uuid4
8
  from datetime import datetime
9
+ from duckduckgo_search import DDGS # Corrected import
10
  from sentence_transformers import SentenceTransformer, util
11
  from typing import List, Dict, Any, Optional, Union, Tuple
12
  import logging
 
24
  raise ValueError("Please set the HF_API_KEY environment variable.")
25
 
26
  # You can use different models for different tasks
27
+ MAIN_LLM_ENDPOINT = "your-main-llm-endpoint" # Replace with your actual endpoint
28
  REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed
29
  CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed
30
 
 
48
  def hf_inference(endpoint, inputs, parameters=None, retries=5):
49
  headers = {"Authorization": f"Bearer {HF_API_KEY}"}
50
  payload = {"inputs": inputs, "parameters": parameters or {}}
51
+
52
  for attempt in range(retries):
53
  try:
54
  response = requests.post(endpoint, headers=headers, json=payload, timeout=TIMEOUT)
 
61
  time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff
62
  return {"error": "Request failed after multiple retries."}
63
 
64
+ def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
65
  time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list:
66
  try:
67
+ with DDGS() as ddgs: # Use the DDGS context manager
68
+ results = [r for r in ddgs.text(query, max_results=num_results, safesearch=safesearch,
69
+ time=time_filter, region=region, hreflang=language)] #Simplified call
70
+ if results:
71
+ return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
72
+ else:
73
+ return []
74
  except Exception as e:
75
  logger.error(f"DuckDuckGo search error: {e}")
76
  return []
77
 
78
+ def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
79
  critique: str = "", focus_areas: list = []) -> str:
80
  if not search_results:
81
  return "No search results to reason about."
82
+
83
  reasoning_input = "Reason about the following search results in relation to the prompt:\n\n"
84
  reasoning_input += f"Prompt: {prompt}\n\n"
85
+
86
  if focus_areas:
87
  reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n"
88
+
89
  for i, result in enumerate(search_results):
90
  reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n"
91
+
92
  if reasoning_context:
93
  recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
94
  reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
95
+
96
  if critique:
97
  reasoning_input += f"\n\nRecent critique to address: {critique}\n"
98
+
99
  reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results."
100
 
101
  reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input)
 
109
  def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str:
110
  if not insights:
111
  return "No insights to summarize."
112
+
113
  summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
114
  summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights
115
+
116
  if contradictions:
117
  summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions)
118
+
119
  summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry"
120
+
121
  summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input)
122
 
123
  if isinstance(summarization_output, dict) and "generated_text" in summarization_output:
 
126
  logger.error(f"Failed to generate summary: {summarization_output}")
127
  return "Could not generate a summary due to an error."
128
 
129
+ def tool_generate_search_query(prompt: str, previous_queries: list = [],
130
  failed_queries: list = [], focus_areas: list = []) -> str:
131
  query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n"
132
+
133
  if previous_queries:
134
  recent_queries = previous_queries[-MAX_HISTORY_ITEMS:]
135
  query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n"
136
+
137
  if failed_queries:
138
  query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n"
139
+
140
  if focus_areas:
141
  query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n"
142
+
143
  query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n"
144
  query_gen_input += "Search Query:"
145
+
146
  query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input)
147
 
148
  if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output:
149
  return query_gen_output['generated_text'].strip()
150
+
151
  logger.error(f"Failed to generate search query: {query_gen_output}")
152
  return ""
153
 
154
+ def tool_critique_reasoning(reasoning_output: str, prompt: str,
155
  previous_critiques: list = []) -> str:
156
  critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n"
157
+
158
  if previous_critiques:
159
  critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n"
160
+
161
  critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning."
162
+
163
  critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input)
164
+
165
  if isinstance(critique_output, dict) and "generated_text" in critique_output:
166
  return critique_output["generated_text"].strip()
167
+
168
  logger.error(f"Failed to generate critique: {critique_output}")
169
  return "Could not generate a critique due to an error."
170
 
171
  def tool_identify_contradictions(insights: list) -> list:
172
  if len(insights) < 2:
173
  return []
174
+
175
  contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:])
176
  contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'"
177
+
178
  contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input)
179
+
180
  if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
181
  result = contradiction_output["generated_text"].strip()
182
  if result == "No contradictions found.":
183
  return []
184
+
185
  # Extract numbered contradictions
186
  contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
187
  return [c.strip() for c in contradictions if c.strip()]
188
+
189
  logger.error(f"Failed to identify contradictions: {contradiction_output}")
190
  return []
191
 
192
+ def tool_identify_focus_areas(prompt: str, insights: list = [],
193
  failed_areas: list = []) -> list:
194
  focus_input = f"Based on this research prompt: '{prompt}'\n\n"
195
+
196
  if insights:
197
  focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights
198
+
199
  if failed_areas:
200
  focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n"
201
+
202
  focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas."
203
+
204
  focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input)
205
+
206
  if isinstance(focus_output, dict) and "generated_text" in focus_output:
207
  result = focus_output["generated_text"].strip()
208
  # Extract areas, assuming they're listed with numbers, bullets, or in separate lines
209
  areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
210
  return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3
211
+
212
  logger.error(f"Failed to identify focus areas: {focus_output}")
213
  return []
214
 
215
  def filter_results(search_results, prompt, previous_snippets=None):
216
  if not main_similarity_model or not search_results:
217
  return search_results
218
+
219
  try:
220
  prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True)
221
  filtered_results = []
222
+
223
  # Keep track of snippets we've already seen
224
  seen_snippets = set()
225
  if previous_snippets:
226
  seen_snippets.update(previous_snippets)
227
+
228
  for result in search_results:
229
  combined_text = result['title'] + " " + result['snippet']
230
+
231
  # Skip if we've seen this exact snippet before
232
  if result['snippet'] in seen_snippets:
233
  continue
234
+
235
  result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
236
  cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item()
237
+
238
  if cosine_score >= SIMILARITY_THRESHOLD:
239
  result['relevance_score'] = cosine_score
240
  filtered_results.append(result)
241
  seen_snippets.add(result['snippet'])
242
+
243
  # Sort by relevance score
244
  filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
245
  return filtered_results
 
251
  # New tool: Extract entities for focused research
252
  def tool_extract_key_entities(prompt: str) -> list:
253
  entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line."
254
+
255
  entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input)
256
+
257
  if isinstance(entity_output, dict) and "generated_text" in entity_output:
258
  result = entity_output["generated_text"].strip()
259
  # Split by lines and clean up
260
  entities = [e.strip() for e in result.split('\n') if e.strip()]
261
  return entities[:5] # Limit to 5 entities
262
+
263
  logger.error(f"Failed to extract key entities: {entity_output}")
264
  return []
265
 
 
267
  def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
268
  if not entity_insights:
269
  return "No entity insights to analyze."
270
+
271
  meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n"
272
+
273
  for entity, insights in entity_insights.items():
274
  if insights:
275
  meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity
276
+
277
  meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question"
278
+
279
  meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input)
280
+
281
  if isinstance(meta_output, dict) and "generated_text" in meta_output:
282
  return meta_output["generated_text"].strip()
283
+
284
  logger.error(f"Failed to perform meta-analysis: {meta_output}")
285
  return "Could not generate a meta-analysis due to an error."
286
 
 
385
 
386
  # Only include most recent context items to avoid exceeding context limits
387
  recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context
388
+
389
  prompt += "\nContext (most recent items):\n"
390
  for item in recent_context:
391
  prompt += f"- {item}\n"
 
417
  seen_snippets = set()
418
  contradictions = []
419
  research_session_id = str(uuid4())
420
+
421
  # Start with entity extraction for multi-pronged research
422
  key_entities = tool_extract_key_entities(prompt=prompt)
423
  if key_entities:
424
  context.append(f"Identified key entities: {key_entities}")
425
  intermediate_output += f"Identified key entities for focused research: {key_entities}\n"
426
+
427
  # Tracking progress for each entity
428
  entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
429
  entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities
430
+
431
  for i in range(MAX_ITERATIONS):
432
  # Decide which entity to focus on this iteration, or general research
433
  if key_entities and i > 0:
 
436
  current_entity = entities_to_process[i % len(entities_to_process)]
437
  else:
438
  current_entity = 'general'
439
+
440
  context.append(f"Current focus: {current_entity}")
441
+
442
  # First iteration: general query and initial research
443
  if i == 0:
444
  initial_query = tool_generate_search_query(prompt=prompt)
 
447
  entity_progress['general']['queries'].append(initial_query)
448
  search_results = tool_search_web(query=initial_query)
449
  filtered_search_results = filter_results(search_results, prompt)
450
+
451
  for result in filtered_search_results:
452
  seen_snippets.add(result['snippet'])
453
+
454
  if filtered_search_results:
455
  context.append(f"Initial Search Results: {len(filtered_search_results)} items found")
456
  reasoning_output = tool_reason(prompt, filtered_search_results)
 
462
  else:
463
  failed_queries.append(initial_query)
464
  context.append(f"Initial query yielded no relevant results: {initial_query}")
465
+
466
  # Generate current entity-specific query if applicable
467
  elif current_entity != 'general':
468
  entity_query = tool_generate_search_query(
 
470
  previous_queries=entity_progress[current_entity]['queries'],
471
  focus_areas=focus_areas
472
  )
473
+
474
  if entity_query:
475
  previous_queries.append(entity_query)
476
  entity_progress[current_entity]['queries'].append(entity_query)
477
+
478
  # Search with entity focus
479
  search_results = tool_search_web(query=entity_query)
480
+ filtered_search_results = filter_results(search_results,
481
  f"{prompt} {current_entity}",
482
  previous_snippets=seen_snippets)
483
+
484
  # Update seen snippets
485
  for result in filtered_search_results:
486
  seen_snippets.add(result['snippet'])
487
+
488
  if filtered_search_results:
489
  context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
490
+
491
  # Get entity-specific reasoning
492
  entity_reasoning = tool_reason(
493
  prompt=f"{prompt} focusing on {current_entity}",
 
495
  reasoning_context=entity_progress[current_entity]['insights'],
496
  focus_areas=focus_areas
497
  )
498
+
499
  if entity_reasoning:
500
  all_insights.append(entity_reasoning)
501
  entity_progress[current_entity]['insights'].append(entity_reasoning)
502
+
503
  # Store in entity-specific insights dictionary for meta-analysis
504
  if current_entity not in entity_specific_insights:
505
  entity_specific_insights[current_entity] = []
506
  entity_specific_insights[current_entity].append(entity_reasoning)
507
+
508
  context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...")
509
  else:
510
  failed_queries.append(entity_query)
511
  context.append(f"Entity query for {current_entity} yielded no relevant results")
512
+
513
  # Generate LLM decision for next tool
514
  llm_prompt = create_prompt(task_description, prompt, tools, context)
515
  llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt)
516
+
517
  if isinstance(llm_response, dict) and "error" in llm_response:
518
  intermediate_output += f"LLM Error: {llm_response['error']}\n"
519
  continue
520
+
521
  if not isinstance(llm_response, dict) or "generated_text" not in llm_response:
522
  intermediate_output += "Error: Invalid LLM response.\n"
523
  continue
 
555
  parameters['failed_queries'] = failed_queries
556
  parameters['focus_areas'] = focus_areas
557
  result = tool["function"](**parameters)
558
+
559
  if current_entity != 'general':
560
  entity_progress[current_entity]['queries'].append(result)
561
+
562
  previous_queries.append(result)
563
+
564
  elif tool_name == "reason":
565
  if current_entity != 'general' and 'reasoning_context' not in parameters:
566
  parameters['reasoning_context'] = entity_progress[current_entity]['insights']
567
  elif 'reasoning_context' not in parameters:
568
  parameters['reasoning_context'] = reasoning_context[:]
569
+
570
  if 'prompt' not in parameters:
571
  if current_entity != 'general':
572
  parameters['prompt'] = f"{prompt} focusing on {current_entity}"
573
  else:
574
  parameters['prompt'] = prompt
575
+
576
  if 'search_results' not in parameters:
577
  parameters['search_results'] = []
578
+
579
  if 'focus_areas' not in parameters and focus_areas:
580
  parameters['focus_areas'] = focus_areas
581
+
582
  result = tool["function"](**parameters)
583
+
584
  if current_entity != 'general':
585
  entity_progress[current_entity]['insights'].append(result)
586
  if current_entity not in entity_specific_insights:
 
588
  entity_specific_insights[current_entity].append(result)
589
  else:
590
  reasoning_context.append(result)
591
+
592
  all_insights.append(result)
593
+
594
  elif tool_name == "search_web":
595
  result = tool_search_web(**parameters)
596
+ filtered_result = filter_results(result,
597
  prompt if current_entity == 'general' else f"{prompt} {current_entity}",
598
  previous_snippets=seen_snippets)
599
+
600
  # Update seen snippets
601
  for r in filtered_result:
602
  seen_snippets.add(r['snippet'])
603
+
604
  result = filtered_result
605
+
606
  if not result:
607
  query = parameters.get('query', '')
608
  if query:
609
  failed_queries.append(query)
610
+
611
  elif tool_name == "critique_reasoning":
612
  if 'previous_critiques' not in parameters:
613
  parameters['previous_critiques'] = previous_critiques
614
+
615
  if all_insights:
616
  if 'reasoning_output' not in parameters:
617
  parameters['reasoning_output'] = all_insights[-1]
618
  if 'prompt' not in parameters:
619
  parameters['prompt'] = prompt
620
+
621
  result = tool["function"](**parameters)
622
  previous_critiques.append(result)
623
  context.append(f"Critique: {result[:200]}...")
624
  else:
625
  result = "No reasoning to critique yet."
626
+
627
  elif tool_name == "identify_contradictions":
628
  result = tool["function"](**parameters)
629
  if result:
630
  contradictions = result # Store for later use in summarization
631
  context.append(f"Identified contradictions: {result}")
632
+
633
  elif tool_name == "identify_focus_areas":
634
  if 'failed_areas' not in parameters:
635
  parameters['failed_areas'] = failed_areas
 
640
  focus_areas = result
641
  failed_areas.extend([area for area in old_focus if area not in result])
642
  context.append(f"New focus areas: {result}")
643
+
644
  elif tool_name == "meta_analyze":
645
  if 'entity_insights' not in parameters:
646
  parameters['entity_insights'] = entity_specific_insights
 
650
  if result:
651
  all_insights.append(result) # Add meta-analysis to insights
652
  context.append(f"Meta-analysis across entities: {result[:200]}...")
653
+
654
  else:
655
  result = tool["function"](**parameters)
656
 
 
658
  result_str = str(result)
659
  if len(result_str) > 500:
660
  result_str = result_str[:500] + "..."
661
+
662
  intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
663
+
664
  # Add truncated result to context
665
  result_context = result_str
666
  if len(result_str) > 300: # Even shorter for context
 
670
  except Exception as e:
671
  logger.error(f"Error with {tool_name}: {str(e)}")
672
  context.append(f"Error with {tool_name}: {str(e)}")
673
+ intermediate_output += f"Iteration {i+1} - Error: {str(e)}\ continue
 
674
 
675
  # Perform final meta-analysis if we have entity-specific insights
676
  if len(entity_specific_insights) > 1 and len(all_insights) > 2:
 
687
 
688
  # Prepare the full output with detailed tracking
689
  full_output = f"**Research Prompt:** {prompt}\n\n"
690
+
691
  if key_entities:
692
  full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n"
693
+
694
  full_output += "**Research Process:**\n" + intermediate_output + "\n"
695
+
696
  if contradictions:
697
  full_output += "**Contradictions Identified:**\n"
698
  for i, contradiction in enumerate(contradictions, 1):
699
  full_output += f"{i}. {contradiction}\n"
700
  full_output += "\n"
701
+
702
  full_output += f"**Final Analysis:**\n{final_result}\n\n"
703
+
704
  # Add session info for potential follow-up
705
  full_output += f"Research Session ID: {research_session_id}\n"
706
  full_output += f"Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
707
  full_output += f"Total iterations: {i+1}\n"
708
  full_output += f"Total insights generated: {len(all_insights)}\n"
709
+
710
  return full_output
711
 
712
  # Create CSS for a more professional look
 
750
  ["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."],
751
  ["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"]
752
  ],
753
+ theme="default", # gr.themes.Base() is more explicit, but "default" also works
754
  css=custom_css,
755
  allow_flagging=False,
756
  analytics_enabled=False,
757
  )
758
 
759
+ # Add footer with additional information (Optional, good for context)
760
  footer_html = """
761
  <div class="footer">
762
  <p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p>
763
  <p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p>
764
  </div>
765
  """
766
+ #iface = iface.add_html(footer_html) #gr.Interface object has no attribute add_html
767
 
768
  # Launch the interface
769
  iface.launch(share=False)