Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -70,16 +70,18 @@ class ResearchConfig:
|
|
70 |
"Quantum ML Frontiers"
|
71 |
}
|
72 |
ANALYSIS_TEMPLATE = (
|
73 |
-
"
|
74 |
-
"
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
-
"
|
|
|
|
|
81 |
)
|
82 |
-
|
83 |
DOMAIN_FALLBACKS = {
|
84 |
"biomedical research": """
|
85 |
# Biomedical Research Analysis
|
@@ -148,11 +150,21 @@ class ResearchConfig:
|
|
148 |
"""
|
149 |
}
|
150 |
DOMAIN_PROMPTS = {
|
151 |
-
"biomedical research": "
|
152 |
-
|
153 |
-
"
|
154 |
-
"
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
}
|
157 |
ENSEMBLE_MODELS = {
|
158 |
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
|
@@ -165,9 +177,9 @@ class ResearchConfig:
|
|
165 |
|
166 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
167 |
st.error(
|
168 |
-
"""**Research Portal Configuration Required**
|
169 |
-
1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
|
170 |
-
2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
|
171 |
3. Rebuild deployment"""
|
172 |
)
|
173 |
st.stop()
|
@@ -321,7 +333,7 @@ class CognitiveProcessor:
|
|
321 |
"model": "deepseek-chat",
|
322 |
"messages": [{
|
323 |
"role": "user",
|
324 |
-
"content": f"Respond as Senior AI Researcher:\n{prompt}"
|
325 |
}],
|
326 |
"temperature": 0.7,
|
327 |
"max_tokens": 1500,
|
@@ -384,7 +396,7 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
|
|
384 |
"model": model,
|
385 |
"messages": [{
|
386 |
"role": "user",
|
387 |
-
"content": f"Respond as Senior AI Researcher:\n{prompt}"
|
388 |
}],
|
389 |
"temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
|
390 |
"max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
|
@@ -421,7 +433,7 @@ class QuantumKnowledgeGraph:
|
|
421 |
self.nodes = {}
|
422 |
self.relations = []
|
423 |
self.node_counter = 0
|
424 |
-
|
425 |
def create_node(self, content: Dict, node_type: str) -> int:
|
426 |
self.node_counter += 1
|
427 |
self.nodes[self.node_counter] = {
|
@@ -431,7 +443,7 @@ class QuantumKnowledgeGraph:
|
|
431 |
"connections": []
|
432 |
}
|
433 |
return self.node_counter
|
434 |
-
|
435 |
def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
|
436 |
self.relations.append({
|
437 |
"source": source,
|
@@ -440,7 +452,7 @@ class QuantumKnowledgeGraph:
|
|
440 |
"strength": strength
|
441 |
})
|
442 |
self.nodes[source]["connections"].append(target)
|
443 |
-
|
444 |
def visualize_graph(self, focus_node: int = None) -> str:
|
445 |
dot = Digraph(engine="neato")
|
446 |
for nid, node in self.nodes.items():
|
@@ -451,7 +463,7 @@ class QuantumKnowledgeGraph:
|
|
451 |
if focus_node:
|
452 |
dot.node(str(focus_node), color="red", style="filled")
|
453 |
return dot.source
|
454 |
-
|
455 |
def _truncate_content(self, content: Dict) -> str:
|
456 |
return json.dumps(content)[:50] + "..."
|
457 |
|
@@ -462,7 +474,7 @@ class MultiModalRetriever:
|
|
462 |
self.clip_model = clip_model
|
463 |
self.clip_processor = clip_processor
|
464 |
self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
|
465 |
-
|
466 |
def retrieve(self, query: str, domain: str) -> Dict[str, List]:
|
467 |
results = {
|
468 |
"text": self._retrieve_text(query),
|
@@ -470,16 +482,16 @@ class MultiModalRetriever:
|
|
470 |
"code": self._retrieve_code(query)
|
471 |
}
|
472 |
return results
|
473 |
-
|
474 |
def _retrieve_text(self, query: str) -> List[Any]:
|
475 |
return self.text_retriever.invoke(query)
|
476 |
-
|
477 |
def _retrieve_images(self, query: str) -> List[str]:
|
478 |
inputs = self.clip_processor(text=query, return_tensors="pt")
|
479 |
with torch.no_grad():
|
480 |
_ = self.clip_model.get_text_features(**inputs)
|
481 |
return ["image_result_1.png", "image_result_2.png"]
|
482 |
-
|
483 |
def _retrieve_code(self, query: str) -> List[str]:
|
484 |
return self.code_retriever.invoke(query)
|
485 |
|
@@ -586,9 +598,9 @@ class ResearchWorkflow:
|
|
586 |
try:
|
587 |
analysis = state["messages"][-1].content
|
588 |
validation_prompt = (
|
589 |
-
f"Validate research analysis:\n{analysis}\n\n"
|
590 |
-
"Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
|
591 |
-
"Respond with 'VALID' or 'INVALID'"
|
592 |
)
|
593 |
response = self.processor.process_query(validation_prompt)
|
594 |
logger.info("Output validation completed.")
|
@@ -608,13 +620,13 @@ class ResearchWorkflow:
|
|
608 |
refinement_history.append(current_analysis)
|
609 |
difficulty_level = max(0, 3 - state["context"]["refine_count"])
|
610 |
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
|
611 |
-
|
612 |
if state["context"]["refine_count"] >= 3:
|
613 |
meta_prompt = (
|
614 |
"You are given the following series of refinement outputs:\n" +
|
615 |
"\n---\n".join(refinement_history) +
|
616 |
-
"\n\
|
617 |
-
"Do not introduce new ideas; just synthesize the improvements."
|
618 |
)
|
619 |
meta_response = self.processor.process_query(meta_prompt)
|
620 |
logger.info("Meta-refinement completed.")
|
@@ -625,8 +637,9 @@ class ResearchWorkflow:
|
|
625 |
else:
|
626 |
refinement_prompt = (
|
627 |
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
|
628 |
-
"
|
629 |
-
"
|
|
|
630 |
)
|
631 |
response = self.processor.process_query(refinement_prompt)
|
632 |
logger.info("Refinement completed.")
|
@@ -655,7 +668,7 @@ class ResearchWorkflow:
|
|
655 |
"context": {"error": True},
|
656 |
"metadata": {"status": "error"}
|
657 |
}
|
658 |
-
|
659 |
def enhance_analysis(self, state: AgentState) -> Dict:
|
660 |
try:
|
661 |
analysis = state["messages"][-1].content
|
@@ -742,6 +755,7 @@ class ResearchInterface:
|
|
742 |
unsafe_allow_html=True
|
743 |
)
|
744 |
|
|
|
745 |
def _build_sidebar(self) -> None:
|
746 |
with st.sidebar:
|
747 |
st.title("🔍 Research Database")
|
|
|
70 |
"Quantum ML Frontiers"
|
71 |
}
|
72 |
ANALYSIS_TEMPLATE = (
|
73 |
+
"Let's think step by step. Synthesize a comprehensive technical report based on the following documents. Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n"
|
74 |
+
"Documents:\n{context}\n\n"
|
75 |
+
"Respond with the following structure:\n"
|
76 |
+
"# Technical Analysis Report\n\n"
|
77 |
+
"1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n"
|
78 |
+
"2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n"
|
79 |
+
"3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n"
|
80 |
+
"4. **Potential Applications:** (Real-world applications of the technology)\n"
|
81 |
+
"5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n"
|
82 |
+
"Format: Markdown with LaTeX mathematical notation where applicable."
|
83 |
)
|
84 |
+
|
85 |
DOMAIN_FALLBACKS = {
|
86 |
"biomedical research": """
|
87 |
# Biomedical Research Analysis
|
|
|
150 |
"""
|
151 |
}
|
152 |
DOMAIN_PROMPTS = {
|
153 |
+
"biomedical research": """
|
154 |
+
Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available.
|
155 |
+
""",
|
156 |
+
"legal research": """
|
157 |
+
Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws.
|
158 |
+
""",
|
159 |
+
"environmental and energy studies": """
|
160 |
+
Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality.
|
161 |
+
""",
|
162 |
+
"competitive programming and theoretical computer science": """
|
163 |
+
Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate.
|
164 |
+
""",
|
165 |
+
"social sciences": """
|
166 |
+
Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims.
|
167 |
+
"""
|
168 |
}
|
169 |
ENSEMBLE_MODELS = {
|
170 |
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
|
|
|
177 |
|
178 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
179 |
st.error(
|
180 |
+
"""**Research Portal Configuration Required**
|
181 |
+
1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
|
182 |
+
2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
|
183 |
3. Rebuild deployment"""
|
184 |
)
|
185 |
st.stop()
|
|
|
333 |
"model": "deepseek-chat",
|
334 |
"messages": [{
|
335 |
"role": "user",
|
336 |
+
"content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}" # added 'technical writer'
|
337 |
}],
|
338 |
"temperature": 0.7,
|
339 |
"max_tokens": 1500,
|
|
|
396 |
"model": model,
|
397 |
"messages": [{
|
398 |
"role": "user",
|
399 |
+
"content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}" # added 'technical writer'
|
400 |
}],
|
401 |
"temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
|
402 |
"max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
|
|
|
433 |
self.nodes = {}
|
434 |
self.relations = []
|
435 |
self.node_counter = 0
|
436 |
+
|
437 |
def create_node(self, content: Dict, node_type: str) -> int:
|
438 |
self.node_counter += 1
|
439 |
self.nodes[self.node_counter] = {
|
|
|
443 |
"connections": []
|
444 |
}
|
445 |
return self.node_counter
|
446 |
+
|
447 |
def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
|
448 |
self.relations.append({
|
449 |
"source": source,
|
|
|
452 |
"strength": strength
|
453 |
})
|
454 |
self.nodes[source]["connections"].append(target)
|
455 |
+
|
456 |
def visualize_graph(self, focus_node: int = None) -> str:
|
457 |
dot = Digraph(engine="neato")
|
458 |
for nid, node in self.nodes.items():
|
|
|
463 |
if focus_node:
|
464 |
dot.node(str(focus_node), color="red", style="filled")
|
465 |
return dot.source
|
466 |
+
|
467 |
def _truncate_content(self, content: Dict) -> str:
|
468 |
return json.dumps(content)[:50] + "..."
|
469 |
|
|
|
474 |
self.clip_model = clip_model
|
475 |
self.clip_processor = clip_processor
|
476 |
self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
|
477 |
+
|
478 |
def retrieve(self, query: str, domain: str) -> Dict[str, List]:
|
479 |
results = {
|
480 |
"text": self._retrieve_text(query),
|
|
|
482 |
"code": self._retrieve_code(query)
|
483 |
}
|
484 |
return results
|
485 |
+
|
486 |
def _retrieve_text(self, query: str) -> List[Any]:
|
487 |
return self.text_retriever.invoke(query)
|
488 |
+
|
489 |
def _retrieve_images(self, query: str) -> List[str]:
|
490 |
inputs = self.clip_processor(text=query, return_tensors="pt")
|
491 |
with torch.no_grad():
|
492 |
_ = self.clip_model.get_text_features(**inputs)
|
493 |
return ["image_result_1.png", "image_result_2.png"]
|
494 |
+
|
495 |
def _retrieve_code(self, query: str) -> List[str]:
|
496 |
return self.code_retriever.invoke(query)
|
497 |
|
|
|
598 |
try:
|
599 |
analysis = state["messages"][-1].content
|
600 |
validation_prompt = (
|
601 |
+
f"Validate the following research analysis:\n{analysis}\n\n"
|
602 |
+
"Check for:\n1. Technical accuracy\n2. Citation support (are claims backed by evidence?)\n3. Logical consistency\n4. Methodological soundness\n\n"
|
603 |
+
"Respond with 'VALID: [brief justification]' or 'INVALID: [brief justification]'." # changed prompt
|
604 |
)
|
605 |
response = self.processor.process_query(validation_prompt)
|
606 |
logger.info("Output validation completed.")
|
|
|
620 |
refinement_history.append(current_analysis)
|
621 |
difficulty_level = max(0, 3 - state["context"]["refine_count"])
|
622 |
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
|
623 |
+
|
624 |
if state["context"]["refine_count"] >= 3:
|
625 |
meta_prompt = (
|
626 |
"You are given the following series of refinement outputs:\n" +
|
627 |
"\n---\n".join(refinement_history) +
|
628 |
+
"\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
|
629 |
+
"Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand." # modified prompt
|
630 |
)
|
631 |
meta_response = self.processor.process_query(meta_prompt)
|
632 |
logger.info("Meta-refinement completed.")
|
|
|
637 |
else:
|
638 |
refinement_prompt = (
|
639 |
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
|
640 |
+
"First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n" #added this
|
641 |
+
"Then, improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
|
642 |
+
"Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
|
643 |
)
|
644 |
response = self.processor.process_query(refinement_prompt)
|
645 |
logger.info("Refinement completed.")
|
|
|
668 |
"context": {"error": True},
|
669 |
"metadata": {"status": "error"}
|
670 |
}
|
671 |
+
|
672 |
def enhance_analysis(self, state: AgentState) -> Dict:
|
673 |
try:
|
674 |
analysis = state["messages"][-1].content
|
|
|
755 |
unsafe_allow_html=True
|
756 |
)
|
757 |
|
758 |
+
|
759 |
def _build_sidebar(self) -> None:
|
760 |
with st.sidebar:
|
761 |
st.title("🔍 Research Database")
|