Reality123b commited on
Commit
fcf4ade
·
verified ·
1 Parent(s): 48a64eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -86
app.py CHANGED
@@ -17,8 +17,7 @@ import arxiv
17
  import scholarly
18
  import pymed
19
  import wikipedia
20
- #from newspaper3k import Article # Removed newspaper3k
21
- import trafilatura # Import trafilatura
22
  from trafilatura import extract, fetch_url
23
  import pickle
24
  import faiss
@@ -38,18 +37,20 @@ client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
38
  MAIN_LLM_MODEL = "mistralai/Mistral-Nemo-Instruct-2407"
39
  REASONING_LLM_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
40
  CRITIC_LLM_MODEL = "Qwen/QwQ-32B-Preview"
41
- ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
42
 
43
- MAX_ITERATIONS = 20
44
- TIMEOUT = 120
45
- RETRY_DELAY = 5
46
- NUM_RESULTS = 15
47
  SIMILARITY_THRESHOLD = 0.15
48
- MAX_CONTEXT_ITEMS = 30
49
- MAX_HISTORY_ITEMS = 8
50
- MAX_FULL_TEXT_LENGTH = 10000
51
  FAISS_INDEX_PATH = "research_index.faiss"
52
  RESEARCH_DATA_PATH = "research_data.pkl"
 
 
53
 
54
  try:
55
  main_similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
@@ -61,7 +62,7 @@ try:
61
  index = faiss.read_index(FAISS_INDEX_PATH)
62
  logger.info(f"Loaded FAISS index from {FAISS_INDEX_PATH}")
63
  else:
64
- index = faiss.IndexFlatIP(embedding_dim)
65
  logger.info("Created a new FAISS index.")
66
  except Exception as e:
67
  logger.error(f"Failed to load models or initialize FAISS: {e}")
@@ -97,6 +98,29 @@ def load_research_data():
97
  logger.info("No existing research data found.")
98
  return {}
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def hf_inference(model_name, prompt, max_tokens=2000, retries=5):
101
  for attempt in range(retries):
102
  try:
@@ -137,11 +161,11 @@ def ensemble_inference(prompt, models=ENSEMBLE_MODELS, max_tokens=1500):
137
  for result in results:
138
  synthesis_prompt += f"Expert {results.index(result) + 1} ({result['model'].split('/')[-1]}):\n{result['text']}\n\n"
139
 
140
- synthesis = hf_inference(MAIN_LLM_MODEL, synthesis_prompt)
141
  if "generated_text" in synthesis:
142
  return synthesis
143
  else:
144
- return {"generated_text": max(results, key=lambda x: len(x["text"]))["text"]}
145
 
146
  def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
147
  time_filter: Optional[str] = None, region: str = "wt-wt", language: str = "en-us") -> list:
@@ -170,19 +194,6 @@ def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str
170
  return []
171
  except Exception as e:
172
  logger.error(f"DuckDuckGo search error: {e}")
173
- try:
174
- with DDGS() as ddgs:
175
- results = [r for r in ddgs.text(
176
- keywords=query,
177
- max_results=num_results,
178
- safesearch=safesearch,
179
- region=region,
180
- hreflang=language
181
- )]
182
- if results:
183
- return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
184
- except Exception as e2:
185
- logger.error(f"Fallback DuckDuckGo search also failed: {e2}")
186
  return []
187
 
188
  def tool_search_arxiv(query: str, max_results: int = 5) -> list:
@@ -284,9 +295,9 @@ def tool_search_scholar(query: str, max_results: int = 5) -> list:
284
  def extract_article_content(url: str) -> str:
285
  try:
286
  downloaded = fetch_url(url)
287
- if downloaded is None: # Handle potential download failures
288
  return ""
289
- return extract(downloaded, favor_precision=True) #Added favor_precision
290
  except Exception as e:
291
  logger.error(f"Failed to extract article content from {url}: {e}")
292
  return ""
@@ -304,7 +315,7 @@ def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
304
 
305
  results_by_source = {}
306
  for i, result in enumerate(search_results):
307
- source = result.get('source', 'Web Search')
308
  if source not in results_by_source:
309
  results_by_source[source] = []
310
  results_by_source[source].append((i, result))
@@ -320,15 +331,15 @@ def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
320
  reasoning_input += "\n"
321
 
322
  if reasoning_context:
323
- recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
324
  reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
325
 
326
  if critique:
327
  reasoning_input += f"\n\nRecent critique to address: {critique}\n"
328
 
329
- reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives, potential contradictions in the search results, and the reliability of different sources."
330
 
331
- reasoning_output = ensemble_inference(reasoning_input)
332
 
333
  if isinstance(reasoning_output, dict) and "generated_text" in reasoning_output:
334
  return reasoning_output["generated_text"].strip()
@@ -342,7 +353,7 @@ def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> st
342
 
343
  summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
344
 
345
- max_tokens = 12000
346
  selected_insights = []
347
  token_count = get_token_count(summarization_input) + get_token_count("\n\n".join(contradictions))
348
 
@@ -403,7 +414,7 @@ def tool_critique_reasoning(reasoning_output: str, prompt: str,
403
 
404
  critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning. Also evaluate the strength of evidence and whether conclusions are proportionate to the available information."
405
 
406
- critique_output = hf_inference(CRITIC_LLM_MODEL, critique_input)
407
 
408
  if isinstance(critique_output, dict) and "generated_text" in critique_output:
409
  return critique_output["generated_text"].strip()
@@ -415,7 +426,7 @@ def tool_identify_contradictions(insights: list) -> list:
415
  if len(insights) < 2:
416
  return []
417
 
418
- max_tokens = 12000
419
  selected_insights = []
420
  token_count = 0
421
 
@@ -430,13 +441,13 @@ def tool_identify_contradictions(insights: list) -> list:
430
  contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(selected_insights)
431
  contradiction_input += "\n\nList each contradiction as a separate numbered point. For each contradiction, cite the specific claims that are in tension and evaluate which claim is better supported. If no contradictions exist, respond with 'No contradictions found.'"
432
 
433
- contradiction_output = hf_inference(CRITIC_LLM_MODEL, contradiction_input)
434
 
435
  if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
436
  result = contradiction_output["generated_text"].strip()
437
  if result == "No contradictions found.":
438
  return []
439
-
440
  contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
441
  return [c.strip() for c in contradictions if c.strip()]
442
 
@@ -456,10 +467,11 @@ def tool_identify_focus_areas(prompt: str, insights: list = [],
456
 
457
  focus_input += "Identify 3-5 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas. For each suggested area, briefly explain why it's important to investigate."
458
 
459
- focus_output = hf_inference(MAIN_LLM_MODEL, focus_input)
460
 
461
  if isinstance(focus_output, dict) and "generated_text" in focus_output:
462
  result = focus_output["generated_text"].strip()
 
463
  areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
464
  return [area.strip() for area in areas if area.strip()][:5]
465
 
@@ -472,7 +484,7 @@ def add_to_faiss_index(text: str):
472
  if embedding_np.shape[1] != embedding_dim:
473
  logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
474
  return
475
- faiss.normalize_L2(embedding_np)
476
  index.add(embedding_np)
477
 
478
  def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
@@ -497,7 +509,7 @@ def filter_results(search_results, prompt, previous_snippets=None):
497
  for result in search_results:
498
  combined_text = result['title'] + " " + result['snippet']
499
 
500
- if result['snippet'] in seen_snippets:
501
  continue
502
 
503
  result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
@@ -506,16 +518,16 @@ def filter_results(search_results, prompt, previous_snippets=None):
506
  if cosine_score >= SIMILARITY_THRESHOLD:
507
  result['relevance_score'] = cosine_score
508
  filtered_results.append(result)
509
- seen_snippets.add(result['snippet'])
510
  add_to_faiss_index(result['snippet'])
511
 
512
 
513
- filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
514
  return filtered_results
515
 
516
  except Exception as e:
517
  logger.error(f"Error during filtering: {e}")
518
- return search_results
519
 
520
  def tool_extract_key_entities(prompt: str) -> list:
521
  entity_input = f"Extract the key entities (people, organizations, concepts, technologies, events, time periods, locations, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList the 5-7 most important entities, one per line, with a brief explanation (2-3 sentences) of why each is central to the research question."
@@ -525,7 +537,7 @@ def tool_extract_key_entities(prompt: str) -> list:
525
  if isinstance(entity_output, dict) and "generated_text" in entity_output:
526
  result = entity_output["generated_text"].strip()
527
  entities = [e.strip() for e in result.split('\n') if e.strip()]
528
- return entities[:7]
529
 
530
  logger.error(f"Failed to extract key entities: {entity_output}")
531
  return []
@@ -538,11 +550,11 @@ def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
538
 
539
  for entity, insights in entity_insights.items():
540
  if insights:
541
- meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n"
542
 
543
  meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences and contradictions\n3. How these entities interact or influence each other\n4. The broader implications for the original research question\n5. A systems-level understanding of how these elements fit together"
544
 
545
- meta_output = ensemble_inference(meta_input)
546
 
547
  if isinstance(meta_output, dict) and "generated_text" in meta_output:
548
  return meta_output["generated_text"].strip()
@@ -567,7 +579,7 @@ def tool_draft_research_plan(prompt: str, entities: list, focus_areas: list = []
567
  plan_input += "5. Potential challenges and how to address them\n"
568
  plan_input += "6. Criteria for evaluating the quality of findings"
569
 
570
- plan_output = hf_inference(REASONING_LLM_MODEL, plan_input)
571
 
572
  if isinstance(plan_output, dict) and "generated_text" in plan_output:
573
  return plan_output["generated_text"].strip()
@@ -576,10 +588,31 @@ def tool_draft_research_plan(prompt: str, entities: list, focus_areas: list = []
576
  return "Could not generate a research plan due to an error."
577
 
578
  def tool_extract_article(url: str) -> str:
579
- # Use trafilatura's extraction function
580
  extracted_text = extract_article_content(url)
581
  return extracted_text if extracted_text else f"Could not extract content from {url}"
582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  tools = {
584
  "search_web": {
585
  "function": tool_search_web,
@@ -630,6 +663,13 @@ tools = {
630
  "parameters": {
631
  "url": {"type": "string", "description": "The URL of the article to extract"}
632
  },
 
 
 
 
 
 
 
633
  },
634
  "reason": {
635
  "function": tool_reason,
@@ -734,16 +774,13 @@ Available Tools:
734
  Instructions:
735
  Select the BEST tool and parameters for the current research stage. Output valid JSON. If no tool is appropriate, respond with {}.
736
  Only use provided tools. Be strategic about which tool to use next based on the research progress so far.
737
-
738
  You MUST be methodical. Think step-by-step:
739
-
740
  1. **Plan:** If it's the very beginning, extract key entities, identify focus areas, and then draft a research plan.
741
  2. **Search:** Use a variety of search tools. Start with broad searches, then narrow down. Use specific search tools (arXiv, PubMed, Scholar) for relevant topics.
742
  3. **Analyze:** Reason deeply about search results, and critique your reasoning. Identify contradictions. Filter and use FAISS index for relevant information.
743
  4. **Refine:** If results are poor, generate *better* search queries. Adjust focus areas.
744
  5. **Iterate:** Repeat steps 2-4, focusing on different entities and aspects.
745
  6. **Synthesize:** Finally, summarize the findings, addressing contradictions.
746
-
747
  Example:
748
  {"tool": "search_web", "parameters": {"query": "Eiffel Tower location"}}
749
  Output:
@@ -753,7 +790,8 @@ Output:
753
  def deep_research(prompt):
754
  task_description = "You are an advanced research assistant. Use available tools iteratively, focus on different aspects, follow promising leads, critically evaluate your findings, and build up a comprehensive understanding. Utilize the FAISS index to avoid redundant searches and build a persistent knowledge base."
755
  research_data = load_research_data()
756
-
 
757
  context = research_data.get('context', [])
758
  all_insights = research_data.get('all_insights', [])
759
  entity_specific_insights = research_data.get('entity_specific_insights', {})
@@ -776,47 +814,50 @@ def deep_research(prompt):
776
  logger.info("Initialized a fresh FAISS Index")
777
 
778
  key_entities_with_descriptions = tool_extract_key_entities(prompt=prompt)
779
- key_entities = [e.split(":")[0].strip() for e in key_entities_with_descriptions]
780
  if key_entities:
781
  context.append(f"Identified key entities: {key_entities}")
782
  intermediate_output += f"Identified key entities for focused research: {key_entities_with_descriptions}\n"
783
 
 
784
  entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
785
- entity_progress['general'] = {'queries': [], 'insights': []}
786
  for entity in key_entities + ['general']:
787
- if entity in research_data:
788
  entity_progress[entity]['queries'] = research_data[entity]['queries']
789
  entity_progress[entity]['insights'] = research_data[entity]['insights']
790
 
791
  if not focus_areas: # Corrected placement: outside the loop
792
  initial_focus_areas = tool_identify_focus_areas(prompt=prompt)
793
  research_plan = tool_draft_research_plan(prompt=prompt, entities=key_entities, focus_areas=initial_focus_areas)
794
- context.append(f"Initial Research Plan: {research_plan[:200]}...")
795
  intermediate_output += f"Initial Research Plan:\n{research_plan}\n\n"
796
  focus_areas = initial_focus_areas
797
 
798
 
799
  for i in range(MAX_ITERATIONS):
800
- if key_entities and i > 0:
801
- entities_to_process = key_entities + ['general']
 
802
  current_entity = entities_to_process[i % len(entities_to_process)]
803
  else:
804
- current_entity = 'general'
805
 
806
  context.append(f"Current focus: {current_entity}")
807
 
808
- if i > 0:
 
809
  faiss_results_indices = search_faiss_index(prompt if current_entity == 'general' else f"{prompt} {current_entity}")
810
  faiss_context = []
811
  for idx in faiss_results_indices:
812
- if idx < len(all_insights):
813
  faiss_context.append(f"Previously found insight: {all_insights[idx]}")
814
  if faiss_context:
815
- context.extend(faiss_context)
816
  intermediate_output += f"Iteration {i+1} - Retrieved {len(faiss_context)} relevant items from FAISS index.\n"
817
 
818
 
819
- if i == 0:
820
  initial_query = tool_generate_search_query(prompt=prompt)
821
  if initial_query:
822
  previous_queries.append(initial_query)
@@ -861,6 +902,7 @@ def deep_research(prompt):
861
  previous_queries.append(entity_query)
862
  entity_progress[current_entity]['queries'].append(entity_query)
863
 
 
864
  with ThreadPoolExecutor(max_workers=5) as executor:
865
  futures = [
866
  executor.submit(tool_search_web, query=entity_query, num_results=NUM_RESULTS//2),
@@ -874,9 +916,10 @@ def deep_research(prompt):
874
  for future in as_completed(futures):
875
  search_results.extend(future.result())
876
 
 
877
  filtered_search_results = filter_results(search_results,
878
  f"{prompt} {current_entity}",
879
- previous_snippets=seen_snippets)
880
 
881
  if filtered_search_results:
882
  context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
@@ -884,7 +927,7 @@ def deep_research(prompt):
884
  entity_reasoning = tool_reason(
885
  prompt=f"{prompt} focusing on {current_entity}",
886
  search_results=filtered_search_results,
887
- reasoning_context=entity_progress[current_entity]['insights'],
888
  focus_areas=focus_areas
889
  )
890
 
@@ -915,17 +958,17 @@ def deep_research(prompt):
915
 
916
  try:
917
  response_text = llm_response["generated_text"].strip()
918
- response_json = json.loads(response_text)
919
  intermediate_output += f"Iteration {i+1} - Focus: {current_entity} - Action: {response_text}\n"
920
  except json.JSONDecodeError:
921
  intermediate_output += f"Iteration {i+1} - LLM Response (Invalid JSON): {llm_response['generated_text'][:100]}...\n"
922
- context.append(f"Invalid JSON: {llm_response['generated_text'][:100]}...")
923
  continue
924
 
925
  tool_name = response_json.get("tool")
926
  parameters = response_json.get("parameters", {})
927
 
928
- if not tool_name:
929
  if all_insights:
930
  if i > MAX_ITERATIONS // 2:
931
  break
@@ -947,7 +990,7 @@ def deep_research(prompt):
947
  result = tool["function"](**parameters)
948
 
949
  if current_entity != 'general':
950
- entity_progress[current_entity]['queries'].append(result)
951
 
952
  previous_queries.append(result)
953
 
@@ -959,12 +1002,13 @@ def deep_research(prompt):
959
 
960
  filtered_result = filter_results(result, search_prompt, previous_snippets=seen_snippets)
961
 
962
- result = filtered_result
963
 
964
- if not result and 'query' in parameters:
965
  failed_queries.append(parameters['query'])
966
 
967
  elif tool_name == "reason":
 
968
  if current_entity != 'general' and 'reasoning_context' not in parameters:
969
  parameters['reasoning_context'] = entity_progress[current_entity]['insights']
970
  elif 'reasoning_context' not in parameters:
@@ -977,9 +1021,9 @@ def deep_research(prompt):
977
  parameters['prompt'] = prompt
978
 
979
  if 'search_results' not in parameters:
980
- parameters['search_results'] = []
981
 
982
- if 'focus_areas' not in parameters and focus_areas:
983
  parameters['focus_areas'] = focus_areas
984
 
985
  result = tool["function"](**parameters)
@@ -987,20 +1031,20 @@ def deep_research(prompt):
987
  if current_entity != 'general':
988
  entity_progress[current_entity]['insights'].append(result)
989
  if current_entity not in entity_specific_insights:
990
- entity_specific_insights[current_entity] = []
991
  entity_specific_insights[current_entity].append(result)
992
  else:
993
- reasoning_context.append(result)
994
  add_to_faiss_index(result)
995
  all_insights.append(result)
996
 
997
  elif tool_name == "critique_reasoning":
998
- if 'previous_critiques' not in parameters:
999
  parameters['previous_critiques'] = previous_critiques
1000
 
1001
  if all_insights:
1002
  if 'reasoning_output' not in parameters:
1003
- parameters['reasoning_output'] = all_insights[-1]
1004
  if 'prompt' not in parameters:
1005
  parameters['prompt'] = prompt
1006
 
@@ -1013,7 +1057,7 @@ def deep_research(prompt):
1013
  elif tool_name == "identify_contradictions":
1014
  result = tool["function"](**parameters)
1015
  if result:
1016
- contradictions = result
1017
  context.append(f"Identified contradictions: {result}")
1018
 
1019
  elif tool_name == "identify_focus_areas":
@@ -1022,19 +1066,28 @@ def deep_research(prompt):
1022
  result = tool["function"](**parameters)
1023
  if result:
1024
  old_focus = set(focus_areas)
1025
- focus_areas = result
1026
- failed_areas.extend([area for area in old_focus if area not in result])
1027
  context.append(f"New focus areas: {result}")
1028
 
1029
  elif tool_name == "extract_article":
1030
  result = tool["function"](**parameters)
1031
  if result:
1032
  context.append(f"Extracted article content from {parameters['url']}: {result[:200]}...")
 
1033
  reasoning_about_article = tool_reason(prompt=prompt, search_results=[{"title": "Extracted Article", "snippet": result, "url": parameters['url']}])
1034
  if reasoning_about_article:
1035
  all_insights.append(reasoning_about_article)
1036
  add_to_faiss_index(reasoning_about_article)
1037
-
 
 
 
 
 
 
 
 
1038
 
1039
  elif tool_name == "meta_analyze":
1040
  if 'entity_insights' not in parameters:
@@ -1043,12 +1096,13 @@ def deep_research(prompt):
1043
  parameters['prompt'] = prompt
1044
  result = tool["function"](**parameters)
1045
  if result:
1046
- all_insights.append(result)
1047
  context.append(f"Meta-analysis across entities: {result[:200]}...")
1048
  add_to_faiss_index(result)
1049
 
 
1050
  elif tool_name == "draft_research_plan":
1051
- result = "Research plan already generated."
1052
 
1053
  else:
1054
  result = tool["function"](**parameters)
@@ -1059,6 +1113,7 @@ def deep_research(prompt):
1059
 
1060
  intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
1061
 
 
1062
  result_context = result_str
1063
  if len(result_str) > 300:
1064
  result_context = result_str[:300] + "..."
@@ -1070,6 +1125,7 @@ def deep_research(prompt):
1070
  intermediate_output += f"Iteration {i+1} - Error: {str(e)}\n"
1071
  continue
1072
 
 
1073
  research_data = {
1074
  'context': context,
1075
  'all_insights': all_insights,
@@ -1085,22 +1141,24 @@ def deep_research(prompt):
1085
  'research_session_id': research_session_id
1086
  }
1087
  for entity in entity_progress:
1088
- research_data[entity] = entity_progress[entity]
1089
  save_research_data(research_data, index)
1090
 
1091
 
 
1092
  if len(entity_specific_insights) > 1 and len(all_insights) > 2:
1093
  meta_analysis = tool_meta_analyze(entity_insights=entity_specific_insights, prompt=prompt)
1094
  if meta_analysis:
1095
  all_insights.append(meta_analysis)
1096
  intermediate_output += f"Final Meta-Analysis: {meta_analysis[:500]}...\n"
1097
- add_to_faiss_index(meta_analysis)
1098
 
1099
  if all_insights:
1100
- final_result = tool_summarize(all_insights, prompt, contradictions)
1101
  else:
1102
  final_result = "Could not find meaningful information despite multiple attempts."
1103
 
 
1104
  full_output = f"**Research Prompt:** {prompt}\n\n"
1105
 
1106
  if key_entities_with_descriptions:
 
17
  import scholarly
18
  import pymed
19
  import wikipedia
20
+ import trafilatura
 
21
  from trafilatura import extract, fetch_url
22
  import pickle
23
  import faiss
 
37
  MAIN_LLM_MODEL = "mistralai/Mistral-Nemo-Instruct-2407"
38
  REASONING_LLM_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
39
  CRITIC_LLM_MODEL = "Qwen/QwQ-32B-Preview"
40
+ ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL] # Keep, but expand upon.
41
 
42
+ MAX_ITERATIONS = 40 # Increased for deeper research.
43
+ TIMEOUT = 180 # Longer timeout for larger models / complex tasks.
44
+ RETRY_DELAY = 10 # longer delay
45
+ NUM_RESULTS = 20
46
  SIMILARITY_THRESHOLD = 0.15
47
+ MAX_CONTEXT_ITEMS = 50 # Increased context window.
48
+ MAX_HISTORY_ITEMS = 12
49
+ MAX_FULL_TEXT_LENGTH = 20000 # larger document size
50
  FAISS_INDEX_PATH = "research_index.faiss"
51
  RESEARCH_DATA_PATH = "research_data.pkl"
52
+ PAPER_SUMMARIES_PATH = "paper_summaries.pkl" #New path for storing paper summary
53
+
54
 
55
  try:
56
  main_similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
 
62
  index = faiss.read_index(FAISS_INDEX_PATH)
63
  logger.info(f"Loaded FAISS index from {FAISS_INDEX_PATH}")
64
  else:
65
+ index = faiss.IndexFlatIP(embedding_dim) # Use IndexFlatIP for inner product (cosine similarity).
66
  logger.info("Created a new FAISS index.")
67
  except Exception as e:
68
  logger.error(f"Failed to load models or initialize FAISS: {e}")
 
98
  logger.info("No existing research data found.")
99
  return {}
100
 
101
+ def save_paper_summaries(summaries: Dict[str, str]):
102
+ try:
103
+ with open(PAPER_SUMMARIES_PATH, "wb") as f:
104
+ pickle.dump(summaries, f)
105
+ logger.info(f"Paper summaries saved to {PAPER_SUMMARIES_PATH}")
106
+ except Exception as e:
107
+ logger.error(f"Error saving paper summaries: {e}")
108
+
109
+ def load_paper_summaries() -> Dict[str, str]:
110
+ if os.path.exists(PAPER_SUMMARIES_PATH):
111
+ try:
112
+ with open(PAPER_SUMMARIES_PATH, "rb") as f:
113
+ data = pickle.load(f)
114
+ logger.info(f"Loaded paper summaries from {PAPER_SUMMARIES_PATH}")
115
+ return data
116
+ except Exception as e:
117
+ logger.error(f"Error loading paper summaries: {e}")
118
+ return {}
119
+ else:
120
+ logger.info("No existing paper summaries found.")
121
+ return {}
122
+
123
+
124
  def hf_inference(model_name, prompt, max_tokens=2000, retries=5):
125
  for attempt in range(retries):
126
  try:
 
161
  for result in results:
162
  synthesis_prompt += f"Expert {results.index(result) + 1} ({result['model'].split('/')[-1]}):\n{result['text']}\n\n"
163
 
164
+ synthesis = hf_inference(MAIN_LLM_MODEL, synthesis_prompt) # Use a consistent model for final synthesis
165
  if "generated_text" in synthesis:
166
  return synthesis
167
  else:
168
+ return {"generated_text": max(results, key=lambda x: len(x["text"]))["text"]} # Fallback
169
 
170
  def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
171
  time_filter: Optional[str] = None, region: str = "wt-wt", language: str = "en-us") -> list:
 
194
  return []
195
  except Exception as e:
196
  logger.error(f"DuckDuckGo search error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  return []
198
 
199
  def tool_search_arxiv(query: str, max_results: int = 5) -> list:
 
295
  def extract_article_content(url: str) -> str:
296
  try:
297
  downloaded = fetch_url(url)
298
+ if downloaded is None:
299
  return ""
300
+ return extract(downloaded, favor_precision=True)
301
  except Exception as e:
302
  logger.error(f"Failed to extract article content from {url}: {e}")
303
  return ""
 
315
 
316
  results_by_source = {}
317
  for i, result in enumerate(search_results):
318
+ source = result.get('source', 'Web Search') # Default to 'Web Search'
319
  if source not in results_by_source:
320
  results_by_source[source] = []
321
  results_by_source[source].append((i, result))
 
331
  reasoning_input += "\n"
332
 
333
  if reasoning_context:
334
+ recent_context = reasoning_context[-MAX_HISTORY_ITEMS:] # Limit history
335
  reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
336
 
337
  if critique:
338
  reasoning_input += f"\n\nRecent critique to address: {critique}\n"
339
 
340
+ reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives, potential contradictions in the search results, and the reliability of different sources. Address any specific critiques."
341
 
342
+ reasoning_output = ensemble_inference(reasoning_input) # Use ensemble for high-quality reasoning.
343
 
344
  if isinstance(reasoning_output, dict) and "generated_text" in reasoning_output:
345
  return reasoning_output["generated_text"].strip()
 
353
 
354
  summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
355
 
356
+ max_tokens = 12000 # Increased token limit
357
  selected_insights = []
358
  token_count = get_token_count(summarization_input) + get_token_count("\n\n".join(contradictions))
359
 
 
414
 
415
  critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning. Also evaluate the strength of evidence and whether conclusions are proportionate to the available information."
416
 
417
+ critique_output = hf_inference(CRITIC_LLM_MODEL, critique_input) # Use specialized critique model.
418
 
419
  if isinstance(critique_output, dict) and "generated_text" in critique_output:
420
  return critique_output["generated_text"].strip()
 
426
  if len(insights) < 2:
427
  return []
428
 
429
+ max_tokens = 12000 # Increased token limit for potentially more contradictions
430
  selected_insights = []
431
  token_count = 0
432
 
 
441
  contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(selected_insights)
442
  contradiction_input += "\n\nList each contradiction as a separate numbered point. For each contradiction, cite the specific claims that are in tension and evaluate which claim is better supported. If no contradictions exist, respond with 'No contradictions found.'"
443
 
444
+ contradiction_output = hf_inference(CRITIC_LLM_MODEL, contradiction_input) # Use critique model
445
 
446
  if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
447
  result = contradiction_output["generated_text"].strip()
448
  if result == "No contradictions found.":
449
  return []
450
+ # More robust contradiction extraction, handles multi-sentence contradictions
451
  contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
452
  return [c.strip() for c in contradictions if c.strip()]
453
 
 
467
 
468
  focus_input += "Identify 3-5 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas. For each suggested area, briefly explain why it's important to investigate."
469
 
470
+ focus_output = hf_inference(MAIN_LLM_MODEL, focus_input) # Consistent model
471
 
472
  if isinstance(focus_output, dict) and "generated_text" in focus_output:
473
  result = focus_output["generated_text"].strip()
474
+ # More robust extraction, handles different list formats
475
  areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
476
  return [area.strip() for area in areas if area.strip()][:5]
477
 
 
484
  if embedding_np.shape[1] != embedding_dim:
485
  logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
486
  return
487
+ faiss.normalize_L2(embedding_np) # Normalize for cosine similarity.
488
  index.add(embedding_np)
489
 
490
  def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
 
509
  for result in search_results:
510
  combined_text = result['title'] + " " + result['snippet']
511
 
512
+ if result['snippet'] in seen_snippets: # Prevent exact duplicates
513
  continue
514
 
515
  result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
 
518
  if cosine_score >= SIMILARITY_THRESHOLD:
519
  result['relevance_score'] = cosine_score
520
  filtered_results.append(result)
521
+ seen_snippets.add(result['snippet']) # Add snippets after filtering
522
  add_to_faiss_index(result['snippet'])
523
 
524
 
525
+ filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True) # Sort by relevance.
526
  return filtered_results
527
 
528
  except Exception as e:
529
  logger.error(f"Error during filtering: {e}")
530
+ return search_results # Return original results on error.
531
 
532
  def tool_extract_key_entities(prompt: str) -> list:
533
  entity_input = f"Extract the key entities (people, organizations, concepts, technologies, events, time periods, locations, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList the 5-7 most important entities, one per line, with a brief explanation (2-3 sentences) of why each is central to the research question."
 
537
  if isinstance(entity_output, dict) and "generated_text" in entity_output:
538
  result = entity_output["generated_text"].strip()
539
  entities = [e.strip() for e in result.split('\n') if e.strip()]
540
+ return entities[:7] # Limit to top 7 entities
541
 
542
  logger.error(f"Failed to extract key entities: {entity_output}")
543
  return []
 
550
 
551
  for entity, insights in entity_insights.items():
552
  if insights:
553
+ meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Most recent insight for each entity
554
 
555
  meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences and contradictions\n3. How these entities interact or influence each other\n4. The broader implications for the original research question\n5. A systems-level understanding of how these elements fit together"
556
 
557
+ meta_output = ensemble_inference(meta_input) # Ensemble for meta-analysis
558
 
559
  if isinstance(meta_output, dict) and "generated_text" in meta_output:
560
  return meta_output["generated_text"].strip()
 
579
  plan_input += "5. Potential challenges and how to address them\n"
580
  plan_input += "6. Criteria for evaluating the quality of findings"
581
 
582
+ plan_output = hf_inference(REASONING_LLM_MODEL, plan_input) # Use reasoning model
583
 
584
  if isinstance(plan_output, dict) and "generated_text" in plan_output:
585
  return plan_output["generated_text"].strip()
 
588
  return "Could not generate a research plan due to an error."
589
 
590
  def tool_extract_article(url: str) -> str:
 
591
  extracted_text = extract_article_content(url)
592
  return extracted_text if extracted_text else f"Could not extract content from {url}"
593
 
594
+ # New tool for summarizing a single paper
595
+ def tool_summarize_paper(paper_text: str) -> str:
596
+ summarization_prompt = f"""Summarize this academic paper, focusing on the following:
597
+
598
+ 1. **Main Research Question(s):** What questions does the paper address?
599
+ 2. **Methodology:** Briefly describe the methods used (e.g., experiments, surveys, simulations, theoretical analysis).
600
+ 3. **Key Findings:** What are the most important results or conclusions?
601
+ 4. **Limitations:** What are the acknowledged limitations of the study?
602
+ 5. **Implications:** What are the broader implications of the findings, according to the authors?
603
+
604
+ Paper Text:
605
+ {paper_text[:MAX_FULL_TEXT_LENGTH]}
606
+ """ # Truncate if necessary
607
+ summary = hf_inference(REASONING_LLM_MODEL, summarization_prompt, max_tokens=500)
608
+
609
+ if isinstance(summary, dict) and "generated_text" in summary:
610
+ return summary["generated_text"].strip()
611
+ else:
612
+ logger.error(f"Failed to generate summary: {summary}")
613
+ return "Could not generate a summary due to an error."
614
+
615
+
616
  tools = {
617
  "search_web": {
618
  "function": tool_search_web,
 
663
  "parameters": {
664
  "url": {"type": "string", "description": "The URL of the article to extract"}
665
  },
666
+ },
667
+ "summarize_paper": {
668
+ "function": tool_summarize_paper,
669
+ "description": "Summarizes the content of an academic paper.",
670
+ "parameters": {
671
+ "paper_text": {"type": "string", "description": "The full text of the paper to be summarized."},
672
+ },
673
  },
674
  "reason": {
675
  "function": tool_reason,
 
774
  Instructions:
775
  Select the BEST tool and parameters for the current research stage. Output valid JSON. If no tool is appropriate, respond with {}.
776
  Only use provided tools. Be strategic about which tool to use next based on the research progress so far.
 
777
  You MUST be methodical. Think step-by-step:
 
778
  1. **Plan:** If it's the very beginning, extract key entities, identify focus areas, and then draft a research plan.
779
  2. **Search:** Use a variety of search tools. Start with broad searches, then narrow down. Use specific search tools (arXiv, PubMed, Scholar) for relevant topics.
780
  3. **Analyze:** Reason deeply about search results, and critique your reasoning. Identify contradictions. Filter and use FAISS index for relevant information.
781
  4. **Refine:** If results are poor, generate *better* search queries. Adjust focus areas.
782
  5. **Iterate:** Repeat steps 2-4, focusing on different entities and aspects.
783
  6. **Synthesize:** Finally, summarize the findings, addressing contradictions.
 
784
  Example:
785
  {"tool": "search_web", "parameters": {"query": "Eiffel Tower location"}}
786
  Output:
 
790
  def deep_research(prompt):
791
  task_description = "You are an advanced research assistant. Use available tools iteratively, focus on different aspects, follow promising leads, critically evaluate your findings, and build up a comprehensive understanding. Utilize the FAISS index to avoid redundant searches and build a persistent knowledge base."
792
  research_data = load_research_data()
793
+ paper_summaries = load_paper_summaries() # Load paper summaries
794
+
795
  context = research_data.get('context', [])
796
  all_insights = research_data.get('all_insights', [])
797
  entity_specific_insights = research_data.get('entity_specific_insights', {})
 
814
  logger.info("Initialized a fresh FAISS Index")
815
 
816
  key_entities_with_descriptions = tool_extract_key_entities(prompt=prompt)
817
+ key_entities = [e.split(":")[0].strip() for e in key_entities_with_descriptions] # Extract just entity names
818
  if key_entities:
819
  context.append(f"Identified key entities: {key_entities}")
820
  intermediate_output += f"Identified key entities for focused research: {key_entities_with_descriptions}\n"
821
 
822
+ # Initialize progress tracking for each entity.
823
  entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
824
+ entity_progress['general'] = {'queries': [], 'insights': []} # For general, non-entity-specific searches
825
  for entity in key_entities + ['general']:
826
+ if entity in research_data: # Load existing progress
827
  entity_progress[entity]['queries'] = research_data[entity]['queries']
828
  entity_progress[entity]['insights'] = research_data[entity]['insights']
829
 
830
  if not focus_areas: # Corrected placement: outside the loop
831
  initial_focus_areas = tool_identify_focus_areas(prompt=prompt)
832
  research_plan = tool_draft_research_plan(prompt=prompt, entities=key_entities, focus_areas=initial_focus_areas)
833
+ context.append(f"Initial Research Plan: {research_plan[:200]}...") # Add plan to context
834
  intermediate_output += f"Initial Research Plan:\n{research_plan}\n\n"
835
  focus_areas = initial_focus_areas
836
 
837
 
838
  for i in range(MAX_ITERATIONS):
839
+ # Entity-focused iteration strategy
840
+ if key_entities and i > 0: # Cycle through entities *after* initial setup
841
+ entities_to_process = key_entities + ['general'] # Include 'general' for broad searches
842
  current_entity = entities_to_process[i % len(entities_to_process)]
843
  else:
844
+ current_entity = 'general' # Start with general research.
845
 
846
  context.append(f"Current focus: {current_entity}")
847
 
848
+ # FAISS Retrieval
849
+ if i > 0: # Use FAISS *after* the first iteration (once we have data)
850
  faiss_results_indices = search_faiss_index(prompt if current_entity == 'general' else f"{prompt} {current_entity}")
851
  faiss_context = []
852
  for idx in faiss_results_indices:
853
+ if idx < len(all_insights): # Check index bounds
854
  faiss_context.append(f"Previously found insight: {all_insights[idx]}")
855
  if faiss_context:
856
+ context.extend(faiss_context) # Add FAISS context
857
  intermediate_output += f"Iteration {i+1} - Retrieved {len(faiss_context)} relevant items from FAISS index.\n"
858
 
859
 
860
+ if i == 0: #Initial broad search
861
  initial_query = tool_generate_search_query(prompt=prompt)
862
  if initial_query:
863
  previous_queries.append(initial_query)
 
902
  previous_queries.append(entity_query)
903
  entity_progress[current_entity]['queries'].append(entity_query)
904
 
905
+
906
  with ThreadPoolExecutor(max_workers=5) as executor:
907
  futures = [
908
  executor.submit(tool_search_web, query=entity_query, num_results=NUM_RESULTS//2),
 
916
  for future in as_completed(futures):
917
  search_results.extend(future.result())
918
 
919
+
920
  filtered_search_results = filter_results(search_results,
921
  f"{prompt} {current_entity}",
922
+ previous_snippets=seen_snippets) # Pass existing snippets
923
 
924
  if filtered_search_results:
925
  context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
 
927
  entity_reasoning = tool_reason(
928
  prompt=f"{prompt} focusing on {current_entity}",
929
  search_results=filtered_search_results,
930
+ reasoning_context=entity_progress[current_entity]['insights'], # Use entity-specific context
931
  focus_areas=focus_areas
932
  )
933
 
 
958
 
959
  try:
960
  response_text = llm_response["generated_text"].strip()
961
+ response_json = json.loads(response_text) # Parse the JSON response.
962
  intermediate_output += f"Iteration {i+1} - Focus: {current_entity} - Action: {response_text}\n"
963
  except json.JSONDecodeError:
964
  intermediate_output += f"Iteration {i+1} - LLM Response (Invalid JSON): {llm_response['generated_text'][:100]}...\n"
965
+ context.append(f"Invalid JSON: {llm_response['generated_text'][:100]}...") # Add invalid JSON to context
966
  continue
967
 
968
  tool_name = response_json.get("tool")
969
  parameters = response_json.get("parameters", {})
970
 
971
+ if not tool_name: #LLM didn't return a tool. End the process if we are past halfway.
972
  if all_insights:
973
  if i > MAX_ITERATIONS // 2:
974
  break
 
990
  result = tool["function"](**parameters)
991
 
992
  if current_entity != 'general':
993
+ entity_progress[current_entity]['queries'].append(result) # Add entity-specific
994
 
995
  previous_queries.append(result)
996
 
 
1002
 
1003
  filtered_result = filter_results(result, search_prompt, previous_snippets=seen_snippets)
1004
 
1005
+ result = filtered_result # Work with filtered results
1006
 
1007
+ if not result and 'query' in parameters: # Add query to failures if nothing returned.
1008
  failed_queries.append(parameters['query'])
1009
 
1010
  elif tool_name == "reason":
1011
+ # Ensure correct reasoning context is passed.
1012
  if current_entity != 'general' and 'reasoning_context' not in parameters:
1013
  parameters['reasoning_context'] = entity_progress[current_entity]['insights']
1014
  elif 'reasoning_context' not in parameters:
 
1021
  parameters['prompt'] = prompt
1022
 
1023
  if 'search_results' not in parameters:
1024
+ parameters['search_results'] = [] #Avoid errors if no search results.
1025
 
1026
+ if 'focus_areas' not in parameters and focus_areas: # Avoid overwriting focus_areas if already set
1027
  parameters['focus_areas'] = focus_areas
1028
 
1029
  result = tool["function"](**parameters)
 
1031
  if current_entity != 'general':
1032
  entity_progress[current_entity]['insights'].append(result)
1033
  if current_entity not in entity_specific_insights:
1034
+ entity_specific_insights[current_entity] = []
1035
  entity_specific_insights[current_entity].append(result)
1036
  else:
1037
+ reasoning_context.append(result) #Add to general context.
1038
  add_to_faiss_index(result)
1039
  all_insights.append(result)
1040
 
1041
  elif tool_name == "critique_reasoning":
1042
+ if 'previous_critiques' not in parameters: #Pass in the previous critiques.
1043
  parameters['previous_critiques'] = previous_critiques
1044
 
1045
  if all_insights:
1046
  if 'reasoning_output' not in parameters:
1047
+ parameters['reasoning_output'] = all_insights[-1] #Critique the most recent insight.
1048
  if 'prompt' not in parameters:
1049
  parameters['prompt'] = prompt
1050
 
 
1057
  elif tool_name == "identify_contradictions":
1058
  result = tool["function"](**parameters)
1059
  if result:
1060
+ contradictions = result # Keep track of contradictions.
1061
  context.append(f"Identified contradictions: {result}")
1062
 
1063
  elif tool_name == "identify_focus_areas":
 
1066
  result = tool["function"](**parameters)
1067
  if result:
1068
  old_focus = set(focus_areas)
1069
+ focus_areas = result # Update focus areas
1070
+ failed_areas.extend([area for area in old_focus if area not in result]) #Track failed areas
1071
  context.append(f"New focus areas: {result}")
1072
 
1073
  elif tool_name == "extract_article":
1074
  result = tool["function"](**parameters)
1075
  if result:
1076
  context.append(f"Extracted article content from {parameters['url']}: {result[:200]}...")
1077
+ # Reason specifically about the extracted article.
1078
  reasoning_about_article = tool_reason(prompt=prompt, search_results=[{"title": "Extracted Article", "snippet": result, "url": parameters['url']}])
1079
  if reasoning_about_article:
1080
  all_insights.append(reasoning_about_article)
1081
  add_to_faiss_index(reasoning_about_article)
1082
+
1083
+ elif tool_name == "summarize_paper":
1084
+ result = tool["function"](**parameters)
1085
+ if result:
1086
+ paper_summaries[parameters['paper_text'][:100]] = result # Store by a snippet of the text
1087
+ save_paper_summaries(paper_summaries)
1088
+ context.append(f"Summarized paper: {result[:200]}...")
1089
+ add_to_faiss_index(result) # Add the summary itself to FAISS.
1090
+ all_insights.append(result) #Add summary to insights for later summarization.
1091
 
1092
  elif tool_name == "meta_analyze":
1093
  if 'entity_insights' not in parameters:
 
1096
  parameters['prompt'] = prompt
1097
  result = tool["function"](**parameters)
1098
  if result:
1099
+ all_insights.append(result) # Add meta-analysis to overall insights.
1100
  context.append(f"Meta-analysis across entities: {result[:200]}...")
1101
  add_to_faiss_index(result)
1102
 
1103
+
1104
  elif tool_name == "draft_research_plan":
1105
+ result = "Research plan already generated." # Avoid re-generating.
1106
 
1107
  else:
1108
  result = tool["function"](**parameters)
 
1113
 
1114
  intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
1115
 
1116
+ # Add tool use to context, limit context length
1117
  result_context = result_str
1118
  if len(result_str) > 300:
1119
  result_context = result_str[:300] + "..."
 
1125
  intermediate_output += f"Iteration {i+1} - Error: {str(e)}\n"
1126
  continue
1127
 
1128
+ #Save data
1129
  research_data = {
1130
  'context': context,
1131
  'all_insights': all_insights,
 
1141
  'research_session_id': research_session_id
1142
  }
1143
  for entity in entity_progress:
1144
+ research_data[entity] = entity_progress[entity] #save the individual entity
1145
  save_research_data(research_data, index)
1146
 
1147
 
1148
+ # Perform meta-analysis *before* final summarization, if we have enough entity-specific insights.
1149
  if len(entity_specific_insights) > 1 and len(all_insights) > 2:
1150
  meta_analysis = tool_meta_analyze(entity_insights=entity_specific_insights, prompt=prompt)
1151
  if meta_analysis:
1152
  all_insights.append(meta_analysis)
1153
  intermediate_output += f"Final Meta-Analysis: {meta_analysis[:500]}...\n"
1154
+ add_to_faiss_index(meta_analysis) # Add to FAISS
1155
 
1156
  if all_insights:
1157
+ final_result = tool_summarize(all_insights, prompt, contradictions) # Summarize all insights.
1158
  else:
1159
  final_result = "Could not find meaningful information despite multiple attempts."
1160
 
1161
+
1162
  full_output = f"**Research Prompt:** {prompt}\n\n"
1163
 
1164
  if key_entities_with_descriptions: