mgbam commited on
Commit
d179217
·
verified ·
1 Parent(s): 9a46c55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -36
app.py CHANGED
@@ -70,16 +70,18 @@ class ResearchConfig:
70
  "Quantum ML Frontiers"
71
  }
72
  ANALYSIS_TEMPLATE = (
73
- "Analyze these technical documents with scientific rigor:\n{context}\n\n"
74
- "Respond with:\n"
75
- "1. Key Technical Contributions (bullet points)\n"
76
- "2. Novel Methodologies\n"
77
- "3. Empirical Results (with metrics)\n"
78
- "4. Potential Applications\n"
79
- "5. Limitations & Future Directions\n\n"
80
- "Format: Markdown with LaTeX mathematical notation where applicable"
 
 
81
  )
82
- # Fallback analyses using lowercase keys for robust matching
83
  DOMAIN_FALLBACKS = {
84
  "biomedical research": """
85
  # Biomedical Research Analysis
@@ -148,11 +150,21 @@ class ResearchConfig:
148
  """
149
  }
150
  DOMAIN_PROMPTS = {
151
- "biomedical research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
152
- "legal research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
153
- "environmental and energy studies": "Highlight renewable energy technologies, efficiency metrics, and policy implications.",
154
- "competitive programming and theoretical computer science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
155
- "social sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
 
 
 
 
 
 
 
 
 
 
156
  }
157
  ENSEMBLE_MODELS = {
158
  "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
@@ -165,9 +177,9 @@ class ResearchConfig:
165
 
166
  if not ResearchConfig.DEEPSEEK_API_KEY:
167
  st.error(
168
- """**Research Portal Configuration Required**
169
- 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
170
- 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
171
  3. Rebuild deployment"""
172
  )
173
  st.stop()
@@ -321,7 +333,7 @@ class CognitiveProcessor:
321
  "model": "deepseek-chat",
322
  "messages": [{
323
  "role": "user",
324
- "content": f"Respond as Senior AI Researcher:\n{prompt}"
325
  }],
326
  "temperature": 0.7,
327
  "max_tokens": 1500,
@@ -384,7 +396,7 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
384
  "model": model,
385
  "messages": [{
386
  "role": "user",
387
- "content": f"Respond as Senior AI Researcher:\n{prompt}"
388
  }],
389
  "temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
390
  "max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
@@ -421,7 +433,7 @@ class QuantumKnowledgeGraph:
421
  self.nodes = {}
422
  self.relations = []
423
  self.node_counter = 0
424
-
425
  def create_node(self, content: Dict, node_type: str) -> int:
426
  self.node_counter += 1
427
  self.nodes[self.node_counter] = {
@@ -431,7 +443,7 @@ class QuantumKnowledgeGraph:
431
  "connections": []
432
  }
433
  return self.node_counter
434
-
435
  def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
436
  self.relations.append({
437
  "source": source,
@@ -440,7 +452,7 @@ class QuantumKnowledgeGraph:
440
  "strength": strength
441
  })
442
  self.nodes[source]["connections"].append(target)
443
-
444
  def visualize_graph(self, focus_node: int = None) -> str:
445
  dot = Digraph(engine="neato")
446
  for nid, node in self.nodes.items():
@@ -451,7 +463,7 @@ class QuantumKnowledgeGraph:
451
  if focus_node:
452
  dot.node(str(focus_node), color="red", style="filled")
453
  return dot.source
454
-
455
  def _truncate_content(self, content: Dict) -> str:
456
  return json.dumps(content)[:50] + "..."
457
 
@@ -462,7 +474,7 @@ class MultiModalRetriever:
462
  self.clip_model = clip_model
463
  self.clip_processor = clip_processor
464
  self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
465
-
466
  def retrieve(self, query: str, domain: str) -> Dict[str, List]:
467
  results = {
468
  "text": self._retrieve_text(query),
@@ -470,16 +482,16 @@ class MultiModalRetriever:
470
  "code": self._retrieve_code(query)
471
  }
472
  return results
473
-
474
  def _retrieve_text(self, query: str) -> List[Any]:
475
  return self.text_retriever.invoke(query)
476
-
477
  def _retrieve_images(self, query: str) -> List[str]:
478
  inputs = self.clip_processor(text=query, return_tensors="pt")
479
  with torch.no_grad():
480
  _ = self.clip_model.get_text_features(**inputs)
481
  return ["image_result_1.png", "image_result_2.png"]
482
-
483
  def _retrieve_code(self, query: str) -> List[str]:
484
  return self.code_retriever.invoke(query)
485
 
@@ -586,9 +598,9 @@ class ResearchWorkflow:
586
  try:
587
  analysis = state["messages"][-1].content
588
  validation_prompt = (
589
- f"Validate research analysis:\n{analysis}\n\n"
590
- "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
591
- "Respond with 'VALID' or 'INVALID'"
592
  )
593
  response = self.processor.process_query(validation_prompt)
594
  logger.info("Output validation completed.")
@@ -608,13 +620,13 @@ class ResearchWorkflow:
608
  refinement_history.append(current_analysis)
609
  difficulty_level = max(0, 3 - state["context"]["refine_count"])
610
  logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
611
-
612
  if state["context"]["refine_count"] >= 3:
613
  meta_prompt = (
614
  "You are given the following series of refinement outputs:\n" +
615
  "\n---\n".join(refinement_history) +
616
- "\n\nSummarize the above into a final, concise, and high-quality technical analysis report. "
617
- "Do not introduce new ideas; just synthesize the improvements."
618
  )
619
  meta_response = self.processor.process_query(meta_prompt)
620
  logger.info("Meta-refinement completed.")
@@ -625,8 +637,9 @@ class ResearchWorkflow:
625
  else:
626
  refinement_prompt = (
627
  f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
628
- "Improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
629
- "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant."
 
630
  )
631
  response = self.processor.process_query(refinement_prompt)
632
  logger.info("Refinement completed.")
@@ -655,7 +668,7 @@ class ResearchWorkflow:
655
  "context": {"error": True},
656
  "metadata": {"status": "error"}
657
  }
658
-
659
  def enhance_analysis(self, state: AgentState) -> Dict:
660
  try:
661
  analysis = state["messages"][-1].content
@@ -742,6 +755,7 @@ class ResearchInterface:
742
  unsafe_allow_html=True
743
  )
744
 
 
745
  def _build_sidebar(self) -> None:
746
  with st.sidebar:
747
  st.title("🔍 Research Database")
 
70
  "Quantum ML Frontiers"
71
  }
72
  ANALYSIS_TEMPLATE = (
73
+ "Let's think step by step. Synthesize a comprehensive technical report based on the following documents. Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n"
74
+ "Documents:\n{context}\n\n"
75
+ "Respond with the following structure:\n"
76
+ "# Technical Analysis Report\n\n"
77
+ "1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n"
78
+ "2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n"
79
+ "3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n"
80
+ "4. **Potential Applications:** (Real-world applications of the technology)\n"
81
+ "5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n"
82
+ "Format: Markdown with LaTeX mathematical notation where applicable."
83
  )
84
+
85
  DOMAIN_FALLBACKS = {
86
  "biomedical research": """
87
  # Biomedical Research Analysis
 
150
  """
151
  }
152
  DOMAIN_PROMPTS = {
153
+ "biomedical research": """
154
+ Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available.
155
+ """,
156
+ "legal research": """
157
+ Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws.
158
+ """,
159
+ "environmental and energy studies": """
160
+ Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality.
161
+ """,
162
+ "competitive programming and theoretical computer science": """
163
+ Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate.
164
+ """,
165
+ "social sciences": """
166
+ Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims.
167
+ """
168
  }
169
  ENSEMBLE_MODELS = {
170
  "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
 
177
 
178
  if not ResearchConfig.DEEPSEEK_API_KEY:
179
  st.error(
180
+ """**Research Portal Configuration Required**
181
+ 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
182
+ 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
183
  3. Rebuild deployment"""
184
  )
185
  st.stop()
 
333
  "model": "deepseek-chat",
334
  "messages": [{
335
  "role": "user",
336
+ "content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}" # added 'technical writer'
337
  }],
338
  "temperature": 0.7,
339
  "max_tokens": 1500,
 
396
  "model": model,
397
  "messages": [{
398
  "role": "user",
399
+ "content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}" # added 'technical writer'
400
  }],
401
  "temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
402
  "max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
 
433
  self.nodes = {}
434
  self.relations = []
435
  self.node_counter = 0
436
+
437
  def create_node(self, content: Dict, node_type: str) -> int:
438
  self.node_counter += 1
439
  self.nodes[self.node_counter] = {
 
443
  "connections": []
444
  }
445
  return self.node_counter
446
+
447
  def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
448
  self.relations.append({
449
  "source": source,
 
452
  "strength": strength
453
  })
454
  self.nodes[source]["connections"].append(target)
455
+
456
  def visualize_graph(self, focus_node: int = None) -> str:
457
  dot = Digraph(engine="neato")
458
  for nid, node in self.nodes.items():
 
463
  if focus_node:
464
  dot.node(str(focus_node), color="red", style="filled")
465
  return dot.source
466
+
467
  def _truncate_content(self, content: Dict) -> str:
468
  return json.dumps(content)[:50] + "..."
469
 
 
474
  self.clip_model = clip_model
475
  self.clip_processor = clip_processor
476
  self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
477
+
478
  def retrieve(self, query: str, domain: str) -> Dict[str, List]:
479
  results = {
480
  "text": self._retrieve_text(query),
 
482
  "code": self._retrieve_code(query)
483
  }
484
  return results
485
+
486
  def _retrieve_text(self, query: str) -> List[Any]:
487
  return self.text_retriever.invoke(query)
488
+
489
  def _retrieve_images(self, query: str) -> List[str]:
490
  inputs = self.clip_processor(text=query, return_tensors="pt")
491
  with torch.no_grad():
492
  _ = self.clip_model.get_text_features(**inputs)
493
  return ["image_result_1.png", "image_result_2.png"]
494
+
495
  def _retrieve_code(self, query: str) -> List[str]:
496
  return self.code_retriever.invoke(query)
497
 
 
598
  try:
599
  analysis = state["messages"][-1].content
600
  validation_prompt = (
601
+ f"Validate the following research analysis:\n{analysis}\n\n"
602
+ "Check for:\n1. Technical accuracy\n2. Citation support (are claims backed by evidence?)\n3. Logical consistency\n4. Methodological soundness\n\n"
603
+ "Respond with 'VALID: [brief justification]' or 'INVALID: [brief justification]'." # changed prompt
604
  )
605
  response = self.processor.process_query(validation_prompt)
606
  logger.info("Output validation completed.")
 
620
  refinement_history.append(current_analysis)
621
  difficulty_level = max(0, 3 - state["context"]["refine_count"])
622
  logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
623
+
624
  if state["context"]["refine_count"] >= 3:
625
  meta_prompt = (
626
  "You are given the following series of refinement outputs:\n" +
627
  "\n---\n".join(refinement_history) +
628
+ "\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
629
+ "Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand." # modified prompt
630
  )
631
  meta_response = self.processor.process_query(meta_prompt)
632
  logger.info("Meta-refinement completed.")
 
637
  else:
638
  refinement_prompt = (
639
  f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
640
+ "First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n" #added this
641
+ "Then, improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
642
+ "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
643
  )
644
  response = self.processor.process_query(refinement_prompt)
645
  logger.info("Refinement completed.")
 
668
  "context": {"error": True},
669
  "metadata": {"status": "error"}
670
  }
671
+
672
  def enhance_analysis(self, state: AgentState) -> Dict:
673
  try:
674
  analysis = state["messages"][-1].content
 
755
  unsafe_allow_html=True
756
  )
757
 
758
+
759
  def _build_sidebar(self) -> None:
760
  with st.sidebar:
761
  st.title("🔍 Research Database")