mgbam commited on
Commit
1193c1f
·
verified ·
1 Parent(s): d179217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -59
app.py CHANGED
@@ -1,8 +1,11 @@
1
- # ------------------------------
2
- # Enhanced NeuroResearch AI System with Domain Adaptability,
3
- # Refinement Counter, Dynamic Difficulty Gradient, Meta-Refinement Inspired by LADDER,
4
- # Quantum Knowledge Graph & Multi-Modal Enhancements
5
- # ------------------------------
 
 
 
6
  import logging
7
  import os
8
  import re
@@ -31,7 +34,7 @@ from langgraph.graph.message import add_messages
31
  from typing_extensions import TypedDict, Annotated
32
  from langchain.tools.retriever import create_retriever_tool
33
 
34
- # Increase Python's recursion limit (if needed)
35
  sys.setrecursionlimit(1000)
36
 
37
  # ------------------------------
@@ -52,15 +55,20 @@ class AgentState(TypedDict):
52
  metadata: Dict[str, Any]
53
 
54
  # ------------------------------
55
- # Configuration
56
  # ------------------------------
57
  class ResearchConfig:
 
58
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
59
  CHROMA_PATH = "chroma_db"
 
 
60
  CHUNK_SIZE = 512
61
  CHUNK_OVERLAP = 64
62
  MAX_CONCURRENT_REQUESTS = 5
63
  EMBEDDING_DIMENSIONS = 1536
 
 
64
  DOCUMENT_MAP = {
65
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
66
  "CV-Transformer Hybrid Architecture",
@@ -69,19 +77,24 @@ class ResearchConfig:
69
  "Latest Trends in Machine Learning Methods Using Quantum Computing":
70
  "Quantum ML Frontiers"
71
  }
 
 
72
  ANALYSIS_TEMPLATE = (
73
- "Let's think step by step. Synthesize a comprehensive technical report based on the following documents. Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n"
 
 
74
  "Documents:\n{context}\n\n"
75
  "Respond with the following structure:\n"
76
  "# Technical Analysis Report\n\n"
77
- "1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n"
78
- "2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n"
79
- "3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n"
80
- "4. **Potential Applications:** (Real-world applications of the technology)\n"
81
- "5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n"
82
  "Format: Markdown with LaTeX mathematical notation where applicable."
83
  )
84
-
 
85
  DOMAIN_FALLBACKS = {
86
  "biomedical research": """
87
  # Biomedical Research Analysis
@@ -151,30 +164,35 @@ class ResearchConfig:
151
  }
152
  DOMAIN_PROMPTS = {
153
  "biomedical research": """
154
- Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available.
155
- """,
156
  "legal research": """
157
- Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws.
158
- """,
159
  "environmental and energy studies": """
160
- Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality.
161
- """,
162
  "competitive programming and theoretical computer science": """
163
- Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate.
164
- """,
165
  "social sciences": """
166
- Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims.
167
- """
168
  }
 
 
169
  ENSEMBLE_MODELS = {
170
  "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
171
  "deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
172
  }
 
 
173
  CLIP_SETTINGS = {
174
  "model": "openai/clip-vit-large-patch14",
175
  "image_db": "image_vectors"
176
  }
177
 
 
178
  if not ResearchConfig.DEEPSEEK_API_KEY:
179
  st.error(
180
  """**Research Portal Configuration Required**
@@ -230,16 +248,24 @@ class QuantumDocumentManager:
230
  # Extended Quantum Document Manager for Multi-Modal Documents
231
  # ------------------------------
232
  class ExtendedQuantumDocumentManager(QuantumDocumentManager):
233
- """Extended with multi-modal document handling."""
234
- def create_image_collection(self, image_paths: List[str]):
 
 
 
 
 
 
 
 
235
  embeddings = []
236
  valid_images = []
237
  for img_path in image_paths:
238
  try:
239
  image = Image.open(img_path)
240
- inputs = clip_processor(images=image, return_tensors="pt")
241
  with torch.no_grad():
242
- emb = clip_model.get_image_features(**inputs)
243
  embeddings.append(emb.numpy())
244
  valid_images.append(img_path)
245
  except FileNotFoundError:
@@ -256,7 +282,7 @@ class ExtendedQuantumDocumentManager(QuantumDocumentManager):
256
  )
257
 
258
  # Initialize document collections
259
- qdm = ExtendedQuantumDocumentManager()
260
  research_docs = qdm.create_collection([
261
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
262
  "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
@@ -273,7 +299,7 @@ development_docs = qdm.create_collection([
273
  # ------------------------------
274
  class ResearchRetriever:
275
  """
276
- Provides retrieval methods for different domains.
277
  """
278
  def __init__(self) -> None:
279
  try:
@@ -311,9 +337,7 @@ class CognitiveProcessor:
311
  self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
312
 
313
  def process_query(self, prompt: str) -> Dict:
314
- futures = []
315
- for _ in range(3):
316
- futures.append(self.executor.submit(self._execute_api_request, prompt))
317
  results = []
318
  for future in as_completed(futures):
319
  try:
@@ -333,7 +357,7 @@ class CognitiveProcessor:
333
  "model": "deepseek-chat",
334
  "messages": [{
335
  "role": "user",
336
- "content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}" # added 'technical writer'
337
  }],
338
  "temperature": 0.7,
339
  "max_tokens": 1500,
@@ -358,6 +382,7 @@ class CognitiveProcessor:
358
  if not valid_results:
359
  logger.error("All API requests failed.")
360
  return {"error": "All API requests failed"}
 
361
  return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
362
 
363
  # ------------------------------
@@ -365,7 +390,7 @@ class CognitiveProcessor:
365
  # ------------------------------
366
  class EnhancedCognitiveProcessor(CognitiveProcessor):
367
  """
368
- Extended with ensemble processing and knowledge graph integration.
369
  """
370
  def __init__(self) -> None:
371
  super().__init__()
@@ -373,9 +398,7 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
373
  self.ensemble_models = ["deepseek-chat", "deepseek-coder"]
374
 
375
  def process_query(self, prompt: str) -> Dict:
376
- futures = []
377
- for model in self.ensemble_models:
378
- futures.append(self.executor.submit(self._execute_api_request, prompt, model))
379
  results = []
380
  for future in as_completed(futures):
381
  try:
@@ -396,7 +419,7 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
396
  "model": model,
397
  "messages": [{
398
  "role": "user",
399
- "content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}" # added 'technical writer'
400
  }],
401
  "temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
402
  "max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
@@ -416,10 +439,11 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
416
  logger.exception(f"API request failed for model {model}.")
417
  return {"error": str(e)}
418
 
419
- def _update_knowledge_graph(self, response: Dict):
420
  content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
421
  node_id = self.knowledge_graph.create_node({"content": content}, "analysis")
422
  if self.knowledge_graph.node_counter > 1:
 
423
  self.knowledge_graph.create_relation(node_id - 1, node_id, "evolution", strength=0.8)
424
 
425
  # ------------------------------
@@ -428,10 +452,12 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
428
  from graphviz import Digraph
429
 
430
  class QuantumKnowledgeGraph:
431
- """Dynamic knowledge representation system with multi-modal nodes."""
 
 
432
  def __init__(self):
433
- self.nodes = {}
434
- self.relations = []
435
  self.node_counter = 0
436
 
437
  def create_node(self, content: Dict, node_type: str) -> int:
@@ -444,7 +470,7 @@ class QuantumKnowledgeGraph:
444
  }
445
  return self.node_counter
446
 
447
- def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
448
  self.relations.append({
449
  "source": source,
450
  "target": target,
@@ -453,7 +479,7 @@ class QuantumKnowledgeGraph:
453
  })
454
  self.nodes[source]["connections"].append(target)
455
 
456
- def visualize_graph(self, focus_node: int = None) -> str:
457
  dot = Digraph(engine="neato")
458
  for nid, node in self.nodes.items():
459
  label = f"{node['type']}\n{self._truncate_content(node['content'])}"
@@ -467,21 +493,25 @@ class QuantumKnowledgeGraph:
467
  def _truncate_content(self, content: Dict) -> str:
468
  return json.dumps(content)[:50] + "..."
469
 
 
 
 
470
  class MultiModalRetriever:
471
- """Enhanced retrieval system with hybrid search capabilities."""
472
- def __init__(self, text_retriever, clip_model, clip_processor):
 
 
473
  self.text_retriever = text_retriever
474
  self.clip_model = clip_model
475
  self.clip_processor = clip_processor
476
  self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
477
 
478
  def retrieve(self, query: str, domain: str) -> Dict[str, List]:
479
- results = {
480
  "text": self._retrieve_text(query),
481
  "images": self._retrieve_images(query),
482
  "code": self._retrieve_code(query)
483
  }
484
- return results
485
 
486
  def _retrieve_text(self, query: str) -> List[Any]:
487
  return self.text_retriever.invoke(query)
@@ -490,6 +520,7 @@ class MultiModalRetriever:
490
  inputs = self.clip_processor(text=query, return_tensors="pt")
491
  with torch.no_grad():
492
  _ = self.clip_model.get_text_features(**inputs)
 
493
  return ["image_result_1.png", "image_result_2.png"]
494
 
495
  def _retrieve_code(self, query: str) -> List[str]:
@@ -500,7 +531,7 @@ class MultiModalRetriever:
500
  # ------------------------------
501
  class ResearchWorkflow:
502
  """
503
- Defines the multi-step research workflow using a state graph.
504
  """
505
  def __init__(self) -> None:
506
  self.processor = EnhancedCognitiveProcessor()
@@ -533,7 +564,12 @@ class ResearchWorkflow:
533
  try:
534
  query = state["messages"][-1].content
535
  domain = state.get("domain", "Biomedical Research")
536
- new_context = {"raw_query": query, "domain": domain, "refine_count": 0, "refinement_history": []}
 
 
 
 
 
537
  logger.info(f"Query ingested. Domain: {domain}")
538
  return {
539
  "messages": [AIMessage(content="Query ingested successfully")],
@@ -565,7 +601,8 @@ class ResearchWorkflow:
565
 
566
  def analyze_content(self, state: AgentState) -> Dict:
567
  """
568
- Analyzes the retrieved documents. Injects a domain-specific fallback analysis for supported domains.
 
569
  """
570
  try:
571
  domain = state["context"].get("domain", "Biomedical Research").strip().lower()
@@ -599,8 +636,12 @@ class ResearchWorkflow:
599
  analysis = state["messages"][-1].content
600
  validation_prompt = (
601
  f"Validate the following research analysis:\n{analysis}\n\n"
602
- "Check for:\n1. Technical accuracy\n2. Citation support (are claims backed by evidence?)\n3. Logical consistency\n4. Methodological soundness\n\n"
603
- "Respond with 'VALID: [brief justification]' or 'INVALID: [brief justification]'." # changed prompt
 
 
 
 
604
  )
605
  response = self.processor.process_query(validation_prompt)
606
  logger.info("Output validation completed.")
@@ -626,7 +667,7 @@ class ResearchWorkflow:
626
  "You are given the following series of refinement outputs:\n" +
627
  "\n---\n".join(refinement_history) +
628
  "\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
629
- "Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand." # modified prompt
630
  )
631
  meta_response = self.processor.process_query(meta_prompt)
632
  logger.info("Meta-refinement completed.")
@@ -637,8 +678,11 @@ class ResearchWorkflow:
637
  else:
638
  refinement_prompt = (
639
  f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
640
- "First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n" #added this
641
- "Then, improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
 
 
 
642
  "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
643
  )
644
  response = self.processor.process_query(refinement_prompt)
@@ -755,7 +799,6 @@ class ResearchInterface:
755
  unsafe_allow_html=True
756
  )
757
 
758
-
759
  def _build_sidebar(self) -> None:
760
  with st.sidebar:
761
  st.title("🔍 Research Database")
@@ -858,15 +901,24 @@ Potential issues:
858
  # Multi-Modal Retriever Initialization
859
  # ------------------------------
860
  from transformers import CLIPProcessor, CLIPModel
 
 
861
  clip_model = CLIPModel.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
862
  clip_processor = CLIPProcessor.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
 
 
 
 
 
863
  multi_retriever = MultiModalRetriever(retriever.research_retriever, clip_model, clip_processor)
864
 
865
  # ------------------------------
866
  # Execute the Application
867
  # ------------------------------
868
  class ResearchInterfaceExtended(ResearchInterface):
869
- """Extended with domain adaptability, collaboration, and graph visualization."""
 
 
870
  def _build_main_interface(self) -> None:
871
  super()._build_main_interface()
872
 
 
1
+ """
2
+ Enhanced NeuroResearch AI System
3
+ ---------------------------------
4
+ This application integrates domain-adaptive multi-modal retrieval, ensemble cognitive processing,
5
+ and dynamic knowledge graph construction. It is designed for advanced technical research,
6
+ analysis, and reporting, employing triple-redundant API requests and a structured state workflow.
7
+ """
8
+
9
  import logging
10
  import os
11
  import re
 
34
  from typing_extensions import TypedDict, Annotated
35
  from langchain.tools.retriever import create_retriever_tool
36
 
37
+ # Increase Python's recursion limit if needed
38
  sys.setrecursionlimit(1000)
39
 
40
  # ------------------------------
 
55
  metadata: Dict[str, Any]
56
 
57
  # ------------------------------
58
+ # Application Configuration
59
  # ------------------------------
60
  class ResearchConfig:
61
+ # Environment & API configuration
62
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
63
  CHROMA_PATH = "chroma_db"
64
+
65
+ # Document processing settings
66
  CHUNK_SIZE = 512
67
  CHUNK_OVERLAP = 64
68
  MAX_CONCURRENT_REQUESTS = 5
69
  EMBEDDING_DIMENSIONS = 1536
70
+
71
+ # Mapping of documents to research topics
72
  DOCUMENT_MAP = {
73
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
74
  "CV-Transformer Hybrid Architecture",
 
77
  "Latest Trends in Machine Learning Methods Using Quantum Computing":
78
  "Quantum ML Frontiers"
79
  }
80
+
81
+ # Template for detailed analysis using Markdown and LaTeX formatting
82
  ANALYSIS_TEMPLATE = (
83
+ "Let's think step by step. Synthesize a comprehensive technical report based on the following documents. "
84
+ "Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. "
85
+ "The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n"
86
  "Documents:\n{context}\n\n"
87
  "Respond with the following structure:\n"
88
  "# Technical Analysis Report\n\n"
89
+ "1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n"
90
+ "2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n"
91
+ "3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n"
92
+ "4. **Potential Applications:** (Real-world applications of the technology)\n"
93
+ "5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n"
94
  "Format: Markdown with LaTeX mathematical notation where applicable."
95
  )
96
+
97
+ # Domain-specific fallback analyses and prompts
98
  DOMAIN_FALLBACKS = {
99
  "biomedical research": """
100
  # Biomedical Research Analysis
 
164
  }
165
  DOMAIN_PROMPTS = {
166
  "biomedical research": """
167
+ Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available.
168
+ """,
169
  "legal research": """
170
+ Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws.
171
+ """,
172
  "environmental and energy studies": """
173
+ Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality.
174
+ """,
175
  "competitive programming and theoretical computer science": """
176
+ Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate.
177
+ """,
178
  "social sciences": """
179
+ Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims.
180
+ """
181
  }
182
+
183
+ # Ensemble model settings
184
  ENSEMBLE_MODELS = {
185
  "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
186
  "deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
187
  }
188
+
189
+ # CLIP model settings for image embeddings
190
  CLIP_SETTINGS = {
191
  "model": "openai/clip-vit-large-patch14",
192
  "image_db": "image_vectors"
193
  }
194
 
195
+ # Ensure required API keys are configured
196
  if not ResearchConfig.DEEPSEEK_API_KEY:
197
  st.error(
198
  """**Research Portal Configuration Required**
 
248
  # Extended Quantum Document Manager for Multi-Modal Documents
249
  # ------------------------------
250
  class ExtendedQuantumDocumentManager(QuantumDocumentManager):
251
+ """
252
+ Extends QuantumDocumentManager with multi-modal (image) document handling.
253
+ Uses dependency injection for CLIP components.
254
+ """
255
+ def __init__(self, clip_model: Any, clip_processor: Any) -> None:
256
+ super().__init__()
257
+ self.clip_model = clip_model
258
+ self.clip_processor = clip_processor
259
+
260
+ def create_image_collection(self, image_paths: List[str]) -> Optional[Chroma]:
261
  embeddings = []
262
  valid_images = []
263
  for img_path in image_paths:
264
  try:
265
  image = Image.open(img_path)
266
+ inputs = self.clip_processor(images=image, return_tensors="pt")
267
  with torch.no_grad():
268
+ emb = self.clip_model.get_image_features(**inputs)
269
  embeddings.append(emb.numpy())
270
  valid_images.append(img_path)
271
  except FileNotFoundError:
 
282
  )
283
 
284
  # Initialize document collections
285
+ qdm = ExtendedQuantumDocumentManager(clip_model=None, clip_processor=None) # clip_model/processor to be set later
286
  research_docs = qdm.create_collection([
287
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
288
  "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
 
299
  # ------------------------------
300
  class ResearchRetriever:
301
  """
302
+ Provides retrieval methods for research and development domains.
303
  """
304
  def __init__(self) -> None:
305
  try:
 
337
  self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
338
 
339
  def process_query(self, prompt: str) -> Dict:
340
+ futures = [self.executor.submit(self._execute_api_request, prompt) for _ in range(3)]
 
 
341
  results = []
342
  for future in as_completed(futures):
343
  try:
 
357
  "model": "deepseek-chat",
358
  "messages": [{
359
  "role": "user",
360
+ "content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}"
361
  }],
362
  "temperature": 0.7,
363
  "max_tokens": 1500,
 
382
  if not valid_results:
383
  logger.error("All API requests failed.")
384
  return {"error": "All API requests failed"}
385
+ # Choose the result with the longest response content as a simple consensus metric
386
  return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
387
 
388
  # ------------------------------
 
390
  # ------------------------------
391
  class EnhancedCognitiveProcessor(CognitiveProcessor):
392
  """
393
+ Extends CognitiveProcessor with ensemble processing and knowledge graph integration.
394
  """
395
  def __init__(self) -> None:
396
  super().__init__()
 
398
  self.ensemble_models = ["deepseek-chat", "deepseek-coder"]
399
 
400
  def process_query(self, prompt: str) -> Dict:
401
+ futures = [self.executor.submit(self._execute_api_request, prompt, model) for model in self.ensemble_models]
 
 
402
  results = []
403
  for future in as_completed(futures):
404
  try:
 
419
  "model": model,
420
  "messages": [{
421
  "role": "user",
422
+ "content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}"
423
  }],
424
  "temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
425
  "max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
 
439
  logger.exception(f"API request failed for model {model}.")
440
  return {"error": str(e)}
441
 
442
+ def _update_knowledge_graph(self, response: Dict) -> None:
443
  content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
444
  node_id = self.knowledge_graph.create_node({"content": content}, "analysis")
445
  if self.knowledge_graph.node_counter > 1:
446
+ # Create a relation between the previous node and the new node
447
  self.knowledge_graph.create_relation(node_id - 1, node_id, "evolution", strength=0.8)
448
 
449
  # ------------------------------
 
452
  from graphviz import Digraph
453
 
454
  class QuantumKnowledgeGraph:
455
+ """
456
+ Represents a dynamic, multi-modal knowledge graph.
457
+ """
458
  def __init__(self):
459
+ self.nodes: Dict[int, Dict[str, Any]] = {}
460
+ self.relations: List[Dict[str, Any]] = []
461
  self.node_counter = 0
462
 
463
  def create_node(self, content: Dict, node_type: str) -> int:
 
470
  }
471
  return self.node_counter
472
 
473
+ def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0) -> None:
474
  self.relations.append({
475
  "source": source,
476
  "target": target,
 
479
  })
480
  self.nodes[source]["connections"].append(target)
481
 
482
+ def visualize_graph(self, focus_node: Optional[int] = None) -> str:
483
  dot = Digraph(engine="neato")
484
  for nid, node in self.nodes.items():
485
  label = f"{node['type']}\n{self._truncate_content(node['content'])}"
 
493
  def _truncate_content(self, content: Dict) -> str:
494
  return json.dumps(content)[:50] + "..."
495
 
496
+ # ------------------------------
497
+ # Multi-Modal Retriever
498
+ # ------------------------------
499
  class MultiModalRetriever:
500
+ """
501
+ Enhanced retrieval system that integrates text, image, and code snippet search.
502
+ """
503
+ def __init__(self, text_retriever: Any, clip_model: Any, clip_processor: Any) -> None:
504
  self.text_retriever = text_retriever
505
  self.clip_model = clip_model
506
  self.clip_processor = clip_processor
507
  self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
508
 
509
  def retrieve(self, query: str, domain: str) -> Dict[str, List]:
510
+ return {
511
  "text": self._retrieve_text(query),
512
  "images": self._retrieve_images(query),
513
  "code": self._retrieve_code(query)
514
  }
 
515
 
516
  def _retrieve_text(self, query: str) -> List[Any]:
517
  return self.text_retriever.invoke(query)
 
520
  inputs = self.clip_processor(text=query, return_tensors="pt")
521
  with torch.no_grad():
522
  _ = self.clip_model.get_text_features(**inputs)
523
+ # Placeholder for image retrieval results
524
  return ["image_result_1.png", "image_result_2.png"]
525
 
526
  def _retrieve_code(self, query: str) -> List[str]:
 
531
  # ------------------------------
532
  class ResearchWorkflow:
533
  """
534
+ Defines a multi-step research workflow using a state graph.
535
  """
536
  def __init__(self) -> None:
537
  self.processor = EnhancedCognitiveProcessor()
 
564
  try:
565
  query = state["messages"][-1].content
566
  domain = state.get("domain", "Biomedical Research")
567
+ new_context = {
568
+ "raw_query": query,
569
+ "domain": domain,
570
+ "refine_count": 0,
571
+ "refinement_history": []
572
+ }
573
  logger.info(f"Query ingested. Domain: {domain}")
574
  return {
575
  "messages": [AIMessage(content="Query ingested successfully")],
 
601
 
602
  def analyze_content(self, state: AgentState) -> Dict:
603
  """
604
+ Analyzes the retrieved documents. If a domain-specific fallback is available, it is used;
605
+ otherwise, the system synthesizes a comprehensive analysis via the cognitive processor.
606
  """
607
  try:
608
  domain = state["context"].get("domain", "Biomedical Research").strip().lower()
 
636
  analysis = state["messages"][-1].content
637
  validation_prompt = (
638
  f"Validate the following research analysis:\n{analysis}\n\n"
639
+ "Check for:\n"
640
+ "1. Technical accuracy\n"
641
+ "2. Citation support (are claims backed by evidence?)\n"
642
+ "3. Logical consistency\n"
643
+ "4. Methodological soundness\n\n"
644
+ "Respond with 'VALID: [brief justification]' or 'INVALID: [brief justification]'."
645
  )
646
  response = self.processor.process_query(validation_prompt)
647
  logger.info("Output validation completed.")
 
667
  "You are given the following series of refinement outputs:\n" +
668
  "\n---\n".join(refinement_history) +
669
  "\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
670
+ "Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand."
671
  )
672
  meta_response = self.processor.process_query(meta_prompt)
673
  logger.info("Meta-refinement completed.")
 
678
  else:
679
  refinement_prompt = (
680
  f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
681
+ "First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n"
682
+ "Then, improve the following aspects:\n"
683
+ "1. Technical precision\n"
684
+ "2. Empirical grounding\n"
685
+ "3. Theoretical coherence\n\n"
686
  "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
687
  )
688
  response = self.processor.process_query(refinement_prompt)
 
799
  unsafe_allow_html=True
800
  )
801
 
 
802
  def _build_sidebar(self) -> None:
803
  with st.sidebar:
804
  st.title("🔍 Research Database")
 
901
  # Multi-Modal Retriever Initialization
902
  # ------------------------------
903
  from transformers import CLIPProcessor, CLIPModel
904
+
905
+ # Load CLIP components
906
  clip_model = CLIPModel.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
907
  clip_processor = CLIPProcessor.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
908
+
909
+ # Update the ExtendedQuantumDocumentManager with the loaded CLIP components
910
+ qdm.clip_model = clip_model
911
+ qdm.clip_processor = clip_processor
912
+
913
  multi_retriever = MultiModalRetriever(retriever.research_retriever, clip_model, clip_processor)
914
 
915
  # ------------------------------
916
  # Execute the Application
917
  # ------------------------------
918
  class ResearchInterfaceExtended(ResearchInterface):
919
+ """
920
+ Extended interface that includes domain adaptability, collaboration features, and graph visualization.
921
+ """
922
  def _build_main_interface(self) -> None:
923
  super()._build_main_interface()
924