Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
6 |
import logging
|
7 |
import os
|
8 |
import re
|
@@ -31,7 +34,7 @@ from langgraph.graph.message import add_messages
|
|
31 |
from typing_extensions import TypedDict, Annotated
|
32 |
from langchain.tools.retriever import create_retriever_tool
|
33 |
|
34 |
-
# Increase Python's recursion limit
|
35 |
sys.setrecursionlimit(1000)
|
36 |
|
37 |
# ------------------------------
|
@@ -52,15 +55,20 @@ class AgentState(TypedDict):
|
|
52 |
metadata: Dict[str, Any]
|
53 |
|
54 |
# ------------------------------
|
55 |
-
# Configuration
|
56 |
# ------------------------------
|
57 |
class ResearchConfig:
|
|
|
58 |
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
|
59 |
CHROMA_PATH = "chroma_db"
|
|
|
|
|
60 |
CHUNK_SIZE = 512
|
61 |
CHUNK_OVERLAP = 64
|
62 |
MAX_CONCURRENT_REQUESTS = 5
|
63 |
EMBEDDING_DIMENSIONS = 1536
|
|
|
|
|
64 |
DOCUMENT_MAP = {
|
65 |
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
|
66 |
"CV-Transformer Hybrid Architecture",
|
@@ -69,19 +77,24 @@ class ResearchConfig:
|
|
69 |
"Latest Trends in Machine Learning Methods Using Quantum Computing":
|
70 |
"Quantum ML Frontiers"
|
71 |
}
|
|
|
|
|
72 |
ANALYSIS_TEMPLATE = (
|
73 |
-
"Let's think step by step. Synthesize a comprehensive technical report based on the following documents.
|
|
|
|
|
74 |
"Documents:\n{context}\n\n"
|
75 |
"Respond with the following structure:\n"
|
76 |
"# Technical Analysis Report\n\n"
|
77 |
-
"1.
|
78 |
-
"2.
|
79 |
-
"3.
|
80 |
-
"4.
|
81 |
-
"5.
|
82 |
"Format: Markdown with LaTeX mathematical notation where applicable."
|
83 |
)
|
84 |
-
|
|
|
85 |
DOMAIN_FALLBACKS = {
|
86 |
"biomedical research": """
|
87 |
# Biomedical Research Analysis
|
@@ -151,30 +164,35 @@ class ResearchConfig:
|
|
151 |
}
|
152 |
DOMAIN_PROMPTS = {
|
153 |
"biomedical research": """
|
154 |
-
|
155 |
-
|
156 |
"legal research": """
|
157 |
-
|
158 |
-
|
159 |
"environmental and energy studies": """
|
160 |
-
|
161 |
-
|
162 |
"competitive programming and theoretical computer science": """
|
163 |
-
|
164 |
-
|
165 |
"social sciences": """
|
166 |
-
|
167 |
-
|
168 |
}
|
|
|
|
|
169 |
ENSEMBLE_MODELS = {
|
170 |
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
|
171 |
"deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
|
172 |
}
|
|
|
|
|
173 |
CLIP_SETTINGS = {
|
174 |
"model": "openai/clip-vit-large-patch14",
|
175 |
"image_db": "image_vectors"
|
176 |
}
|
177 |
|
|
|
178 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
179 |
st.error(
|
180 |
"""**Research Portal Configuration Required**
|
@@ -230,16 +248,24 @@ class QuantumDocumentManager:
|
|
230 |
# Extended Quantum Document Manager for Multi-Modal Documents
|
231 |
# ------------------------------
|
232 |
class ExtendedQuantumDocumentManager(QuantumDocumentManager):
|
233 |
-
"""
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
embeddings = []
|
236 |
valid_images = []
|
237 |
for img_path in image_paths:
|
238 |
try:
|
239 |
image = Image.open(img_path)
|
240 |
-
inputs = clip_processor(images=image, return_tensors="pt")
|
241 |
with torch.no_grad():
|
242 |
-
emb = clip_model.get_image_features(**inputs)
|
243 |
embeddings.append(emb.numpy())
|
244 |
valid_images.append(img_path)
|
245 |
except FileNotFoundError:
|
@@ -256,7 +282,7 @@ class ExtendedQuantumDocumentManager(QuantumDocumentManager):
|
|
256 |
)
|
257 |
|
258 |
# Initialize document collections
|
259 |
-
qdm = ExtendedQuantumDocumentManager()
|
260 |
research_docs = qdm.create_collection([
|
261 |
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
|
262 |
"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
|
@@ -273,7 +299,7 @@ development_docs = qdm.create_collection([
|
|
273 |
# ------------------------------
|
274 |
class ResearchRetriever:
|
275 |
"""
|
276 |
-
Provides retrieval methods for
|
277 |
"""
|
278 |
def __init__(self) -> None:
|
279 |
try:
|
@@ -311,9 +337,7 @@ class CognitiveProcessor:
|
|
311 |
self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
|
312 |
|
313 |
def process_query(self, prompt: str) -> Dict:
|
314 |
-
futures = []
|
315 |
-
for _ in range(3):
|
316 |
-
futures.append(self.executor.submit(self._execute_api_request, prompt))
|
317 |
results = []
|
318 |
for future in as_completed(futures):
|
319 |
try:
|
@@ -333,7 +357,7 @@ class CognitiveProcessor:
|
|
333 |
"model": "deepseek-chat",
|
334 |
"messages": [{
|
335 |
"role": "user",
|
336 |
-
"content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}"
|
337 |
}],
|
338 |
"temperature": 0.7,
|
339 |
"max_tokens": 1500,
|
@@ -358,6 +382,7 @@ class CognitiveProcessor:
|
|
358 |
if not valid_results:
|
359 |
logger.error("All API requests failed.")
|
360 |
return {"error": "All API requests failed"}
|
|
|
361 |
return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
362 |
|
363 |
# ------------------------------
|
@@ -365,7 +390,7 @@ class CognitiveProcessor:
|
|
365 |
# ------------------------------
|
366 |
class EnhancedCognitiveProcessor(CognitiveProcessor):
|
367 |
"""
|
368 |
-
|
369 |
"""
|
370 |
def __init__(self) -> None:
|
371 |
super().__init__()
|
@@ -373,9 +398,7 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
|
|
373 |
self.ensemble_models = ["deepseek-chat", "deepseek-coder"]
|
374 |
|
375 |
def process_query(self, prompt: str) -> Dict:
|
376 |
-
futures = []
|
377 |
-
for model in self.ensemble_models:
|
378 |
-
futures.append(self.executor.submit(self._execute_api_request, prompt, model))
|
379 |
results = []
|
380 |
for future in as_completed(futures):
|
381 |
try:
|
@@ -396,7 +419,7 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
|
|
396 |
"model": model,
|
397 |
"messages": [{
|
398 |
"role": "user",
|
399 |
-
"content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}"
|
400 |
}],
|
401 |
"temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
|
402 |
"max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
|
@@ -416,10 +439,11 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
|
|
416 |
logger.exception(f"API request failed for model {model}.")
|
417 |
return {"error": str(e)}
|
418 |
|
419 |
-
def _update_knowledge_graph(self, response: Dict):
|
420 |
content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
|
421 |
node_id = self.knowledge_graph.create_node({"content": content}, "analysis")
|
422 |
if self.knowledge_graph.node_counter > 1:
|
|
|
423 |
self.knowledge_graph.create_relation(node_id - 1, node_id, "evolution", strength=0.8)
|
424 |
|
425 |
# ------------------------------
|
@@ -428,10 +452,12 @@ class EnhancedCognitiveProcessor(CognitiveProcessor):
|
|
428 |
from graphviz import Digraph
|
429 |
|
430 |
class QuantumKnowledgeGraph:
|
431 |
-
"""
|
|
|
|
|
432 |
def __init__(self):
|
433 |
-
self.nodes = {}
|
434 |
-
self.relations = []
|
435 |
self.node_counter = 0
|
436 |
|
437 |
def create_node(self, content: Dict, node_type: str) -> int:
|
@@ -444,7 +470,7 @@ class QuantumKnowledgeGraph:
|
|
444 |
}
|
445 |
return self.node_counter
|
446 |
|
447 |
-
def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
|
448 |
self.relations.append({
|
449 |
"source": source,
|
450 |
"target": target,
|
@@ -453,7 +479,7 @@ class QuantumKnowledgeGraph:
|
|
453 |
})
|
454 |
self.nodes[source]["connections"].append(target)
|
455 |
|
456 |
-
def visualize_graph(self, focus_node: int = None) -> str:
|
457 |
dot = Digraph(engine="neato")
|
458 |
for nid, node in self.nodes.items():
|
459 |
label = f"{node['type']}\n{self._truncate_content(node['content'])}"
|
@@ -467,21 +493,25 @@ class QuantumKnowledgeGraph:
|
|
467 |
def _truncate_content(self, content: Dict) -> str:
|
468 |
return json.dumps(content)[:50] + "..."
|
469 |
|
|
|
|
|
|
|
470 |
class MultiModalRetriever:
|
471 |
-
"""
|
472 |
-
|
|
|
|
|
473 |
self.text_retriever = text_retriever
|
474 |
self.clip_model = clip_model
|
475 |
self.clip_processor = clip_processor
|
476 |
self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
|
477 |
|
478 |
def retrieve(self, query: str, domain: str) -> Dict[str, List]:
|
479 |
-
|
480 |
"text": self._retrieve_text(query),
|
481 |
"images": self._retrieve_images(query),
|
482 |
"code": self._retrieve_code(query)
|
483 |
}
|
484 |
-
return results
|
485 |
|
486 |
def _retrieve_text(self, query: str) -> List[Any]:
|
487 |
return self.text_retriever.invoke(query)
|
@@ -490,6 +520,7 @@ class MultiModalRetriever:
|
|
490 |
inputs = self.clip_processor(text=query, return_tensors="pt")
|
491 |
with torch.no_grad():
|
492 |
_ = self.clip_model.get_text_features(**inputs)
|
|
|
493 |
return ["image_result_1.png", "image_result_2.png"]
|
494 |
|
495 |
def _retrieve_code(self, query: str) -> List[str]:
|
@@ -500,7 +531,7 @@ class MultiModalRetriever:
|
|
500 |
# ------------------------------
|
501 |
class ResearchWorkflow:
|
502 |
"""
|
503 |
-
Defines
|
504 |
"""
|
505 |
def __init__(self) -> None:
|
506 |
self.processor = EnhancedCognitiveProcessor()
|
@@ -533,7 +564,12 @@ class ResearchWorkflow:
|
|
533 |
try:
|
534 |
query = state["messages"][-1].content
|
535 |
domain = state.get("domain", "Biomedical Research")
|
536 |
-
new_context = {
|
|
|
|
|
|
|
|
|
|
|
537 |
logger.info(f"Query ingested. Domain: {domain}")
|
538 |
return {
|
539 |
"messages": [AIMessage(content="Query ingested successfully")],
|
@@ -565,7 +601,8 @@ class ResearchWorkflow:
|
|
565 |
|
566 |
def analyze_content(self, state: AgentState) -> Dict:
|
567 |
"""
|
568 |
-
Analyzes the retrieved documents.
|
|
|
569 |
"""
|
570 |
try:
|
571 |
domain = state["context"].get("domain", "Biomedical Research").strip().lower()
|
@@ -599,8 +636,12 @@ class ResearchWorkflow:
|
|
599 |
analysis = state["messages"][-1].content
|
600 |
validation_prompt = (
|
601 |
f"Validate the following research analysis:\n{analysis}\n\n"
|
602 |
-
"Check for:\
|
603 |
-
"
|
|
|
|
|
|
|
|
|
604 |
)
|
605 |
response = self.processor.process_query(validation_prompt)
|
606 |
logger.info("Output validation completed.")
|
@@ -626,7 +667,7 @@ class ResearchWorkflow:
|
|
626 |
"You are given the following series of refinement outputs:\n" +
|
627 |
"\n---\n".join(refinement_history) +
|
628 |
"\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
|
629 |
-
"Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand."
|
630 |
)
|
631 |
meta_response = self.processor.process_query(meta_prompt)
|
632 |
logger.info("Meta-refinement completed.")
|
@@ -637,8 +678,11 @@ class ResearchWorkflow:
|
|
637 |
else:
|
638 |
refinement_prompt = (
|
639 |
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
|
640 |
-
"First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n"
|
641 |
-
"Then, improve the following aspects:\
|
|
|
|
|
|
|
642 |
"Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
|
643 |
)
|
644 |
response = self.processor.process_query(refinement_prompt)
|
@@ -755,7 +799,6 @@ class ResearchInterface:
|
|
755 |
unsafe_allow_html=True
|
756 |
)
|
757 |
|
758 |
-
|
759 |
def _build_sidebar(self) -> None:
|
760 |
with st.sidebar:
|
761 |
st.title("🔍 Research Database")
|
@@ -858,15 +901,24 @@ Potential issues:
|
|
858 |
# Multi-Modal Retriever Initialization
|
859 |
# ------------------------------
|
860 |
from transformers import CLIPProcessor, CLIPModel
|
|
|
|
|
861 |
clip_model = CLIPModel.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
|
862 |
clip_processor = CLIPProcessor.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
|
|
|
|
|
|
|
|
|
|
|
863 |
multi_retriever = MultiModalRetriever(retriever.research_retriever, clip_model, clip_processor)
|
864 |
|
865 |
# ------------------------------
|
866 |
# Execute the Application
|
867 |
# ------------------------------
|
868 |
class ResearchInterfaceExtended(ResearchInterface):
|
869 |
-
"""
|
|
|
|
|
870 |
def _build_main_interface(self) -> None:
|
871 |
super()._build_main_interface()
|
872 |
|
|
|
1 |
+
"""
|
2 |
+
Enhanced NeuroResearch AI System
|
3 |
+
---------------------------------
|
4 |
+
This application integrates domain-adaptive multi-modal retrieval, ensemble cognitive processing,
|
5 |
+
and dynamic knowledge graph construction. It is designed for advanced technical research,
|
6 |
+
analysis, and reporting, employing triple-redundant API requests and a structured state workflow.
|
7 |
+
"""
|
8 |
+
|
9 |
import logging
|
10 |
import os
|
11 |
import re
|
|
|
34 |
from typing_extensions import TypedDict, Annotated
|
35 |
from langchain.tools.retriever import create_retriever_tool
|
36 |
|
37 |
+
# Increase Python's recursion limit if needed
|
38 |
sys.setrecursionlimit(1000)
|
39 |
|
40 |
# ------------------------------
|
|
|
55 |
metadata: Dict[str, Any]
|
56 |
|
57 |
# ------------------------------
|
58 |
+
# Application Configuration
|
59 |
# ------------------------------
|
60 |
class ResearchConfig:
|
61 |
+
# Environment & API configuration
|
62 |
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
|
63 |
CHROMA_PATH = "chroma_db"
|
64 |
+
|
65 |
+
# Document processing settings
|
66 |
CHUNK_SIZE = 512
|
67 |
CHUNK_OVERLAP = 64
|
68 |
MAX_CONCURRENT_REQUESTS = 5
|
69 |
EMBEDDING_DIMENSIONS = 1536
|
70 |
+
|
71 |
+
# Mapping of documents to research topics
|
72 |
DOCUMENT_MAP = {
|
73 |
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
|
74 |
"CV-Transformer Hybrid Architecture",
|
|
|
77 |
"Latest Trends in Machine Learning Methods Using Quantum Computing":
|
78 |
"Quantum ML Frontiers"
|
79 |
}
|
80 |
+
|
81 |
+
# Template for detailed analysis using Markdown and LaTeX formatting
|
82 |
ANALYSIS_TEMPLATE = (
|
83 |
+
"Let's think step by step. Synthesize a comprehensive technical report based on the following documents. "
|
84 |
+
"Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. "
|
85 |
+
"The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n"
|
86 |
"Documents:\n{context}\n\n"
|
87 |
"Respond with the following structure:\n"
|
88 |
"# Technical Analysis Report\n\n"
|
89 |
+
"1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n"
|
90 |
+
"2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n"
|
91 |
+
"3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n"
|
92 |
+
"4. **Potential Applications:** (Real-world applications of the technology)\n"
|
93 |
+
"5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n"
|
94 |
"Format: Markdown with LaTeX mathematical notation where applicable."
|
95 |
)
|
96 |
+
|
97 |
+
# Domain-specific fallback analyses and prompts
|
98 |
DOMAIN_FALLBACKS = {
|
99 |
"biomedical research": """
|
100 |
# Biomedical Research Analysis
|
|
|
164 |
}
|
165 |
DOMAIN_PROMPTS = {
|
166 |
"biomedical research": """
|
167 |
+
Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available.
|
168 |
+
""",
|
169 |
"legal research": """
|
170 |
+
Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws.
|
171 |
+
""",
|
172 |
"environmental and energy studies": """
|
173 |
+
Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality.
|
174 |
+
""",
|
175 |
"competitive programming and theoretical computer science": """
|
176 |
+
Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate.
|
177 |
+
""",
|
178 |
"social sciences": """
|
179 |
+
Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims.
|
180 |
+
"""
|
181 |
}
|
182 |
+
|
183 |
+
# Ensemble model settings
|
184 |
ENSEMBLE_MODELS = {
|
185 |
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
|
186 |
"deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
|
187 |
}
|
188 |
+
|
189 |
+
# CLIP model settings for image embeddings
|
190 |
CLIP_SETTINGS = {
|
191 |
"model": "openai/clip-vit-large-patch14",
|
192 |
"image_db": "image_vectors"
|
193 |
}
|
194 |
|
195 |
+
# Ensure required API keys are configured
|
196 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
197 |
st.error(
|
198 |
"""**Research Portal Configuration Required**
|
|
|
248 |
# Extended Quantum Document Manager for Multi-Modal Documents
|
249 |
# ------------------------------
|
250 |
class ExtendedQuantumDocumentManager(QuantumDocumentManager):
|
251 |
+
"""
|
252 |
+
Extends QuantumDocumentManager with multi-modal (image) document handling.
|
253 |
+
Uses dependency injection for CLIP components.
|
254 |
+
"""
|
255 |
+
def __init__(self, clip_model: Any, clip_processor: Any) -> None:
|
256 |
+
super().__init__()
|
257 |
+
self.clip_model = clip_model
|
258 |
+
self.clip_processor = clip_processor
|
259 |
+
|
260 |
+
def create_image_collection(self, image_paths: List[str]) -> Optional[Chroma]:
|
261 |
embeddings = []
|
262 |
valid_images = []
|
263 |
for img_path in image_paths:
|
264 |
try:
|
265 |
image = Image.open(img_path)
|
266 |
+
inputs = self.clip_processor(images=image, return_tensors="pt")
|
267 |
with torch.no_grad():
|
268 |
+
emb = self.clip_model.get_image_features(**inputs)
|
269 |
embeddings.append(emb.numpy())
|
270 |
valid_images.append(img_path)
|
271 |
except FileNotFoundError:
|
|
|
282 |
)
|
283 |
|
284 |
# Initialize document collections
|
285 |
+
qdm = ExtendedQuantumDocumentManager(clip_model=None, clip_processor=None) # clip_model/processor to be set later
|
286 |
research_docs = qdm.create_collection([
|
287 |
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
|
288 |
"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
|
|
|
299 |
# ------------------------------
|
300 |
class ResearchRetriever:
|
301 |
"""
|
302 |
+
Provides retrieval methods for research and development domains.
|
303 |
"""
|
304 |
def __init__(self) -> None:
|
305 |
try:
|
|
|
337 |
self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
|
338 |
|
339 |
def process_query(self, prompt: str) -> Dict:
|
340 |
+
futures = [self.executor.submit(self._execute_api_request, prompt) for _ in range(3)]
|
|
|
|
|
341 |
results = []
|
342 |
for future in as_completed(futures):
|
343 |
try:
|
|
|
357 |
"model": "deepseek-chat",
|
358 |
"messages": [{
|
359 |
"role": "user",
|
360 |
+
"content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}"
|
361 |
}],
|
362 |
"temperature": 0.7,
|
363 |
"max_tokens": 1500,
|
|
|
382 |
if not valid_results:
|
383 |
logger.error("All API requests failed.")
|
384 |
return {"error": "All API requests failed"}
|
385 |
+
# Choose the result with the longest response content as a simple consensus metric
|
386 |
return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
387 |
|
388 |
# ------------------------------
|
|
|
390 |
# ------------------------------
|
391 |
class EnhancedCognitiveProcessor(CognitiveProcessor):
|
392 |
"""
|
393 |
+
Extends CognitiveProcessor with ensemble processing and knowledge graph integration.
|
394 |
"""
|
395 |
def __init__(self) -> None:
|
396 |
super().__init__()
|
|
|
398 |
self.ensemble_models = ["deepseek-chat", "deepseek-coder"]
|
399 |
|
400 |
def process_query(self, prompt: str) -> Dict:
|
401 |
+
futures = [self.executor.submit(self._execute_api_request, prompt, model) for model in self.ensemble_models]
|
|
|
|
|
402 |
results = []
|
403 |
for future in as_completed(futures):
|
404 |
try:
|
|
|
419 |
"model": model,
|
420 |
"messages": [{
|
421 |
"role": "user",
|
422 |
+
"content": f"Respond as a Senior AI Researcher and Technical Writer:\n{prompt}"
|
423 |
}],
|
424 |
"temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
|
425 |
"max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
|
|
|
439 |
logger.exception(f"API request failed for model {model}.")
|
440 |
return {"error": str(e)}
|
441 |
|
442 |
+
def _update_knowledge_graph(self, response: Dict) -> None:
|
443 |
content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
|
444 |
node_id = self.knowledge_graph.create_node({"content": content}, "analysis")
|
445 |
if self.knowledge_graph.node_counter > 1:
|
446 |
+
# Create a relation between the previous node and the new node
|
447 |
self.knowledge_graph.create_relation(node_id - 1, node_id, "evolution", strength=0.8)
|
448 |
|
449 |
# ------------------------------
|
|
|
452 |
from graphviz import Digraph
|
453 |
|
454 |
class QuantumKnowledgeGraph:
|
455 |
+
"""
|
456 |
+
Represents a dynamic, multi-modal knowledge graph.
|
457 |
+
"""
|
458 |
def __init__(self):
|
459 |
+
self.nodes: Dict[int, Dict[str, Any]] = {}
|
460 |
+
self.relations: List[Dict[str, Any]] = []
|
461 |
self.node_counter = 0
|
462 |
|
463 |
def create_node(self, content: Dict, node_type: str) -> int:
|
|
|
470 |
}
|
471 |
return self.node_counter
|
472 |
|
473 |
+
def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0) -> None:
|
474 |
self.relations.append({
|
475 |
"source": source,
|
476 |
"target": target,
|
|
|
479 |
})
|
480 |
self.nodes[source]["connections"].append(target)
|
481 |
|
482 |
+
def visualize_graph(self, focus_node: Optional[int] = None) -> str:
|
483 |
dot = Digraph(engine="neato")
|
484 |
for nid, node in self.nodes.items():
|
485 |
label = f"{node['type']}\n{self._truncate_content(node['content'])}"
|
|
|
493 |
def _truncate_content(self, content: Dict) -> str:
|
494 |
return json.dumps(content)[:50] + "..."
|
495 |
|
496 |
+
# ------------------------------
|
497 |
+
# Multi-Modal Retriever
|
498 |
+
# ------------------------------
|
499 |
class MultiModalRetriever:
|
500 |
+
"""
|
501 |
+
Enhanced retrieval system that integrates text, image, and code snippet search.
|
502 |
+
"""
|
503 |
+
def __init__(self, text_retriever: Any, clip_model: Any, clip_processor: Any) -> None:
|
504 |
self.text_retriever = text_retriever
|
505 |
self.clip_model = clip_model
|
506 |
self.clip_processor = clip_processor
|
507 |
self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
|
508 |
|
509 |
def retrieve(self, query: str, domain: str) -> Dict[str, List]:
|
510 |
+
return {
|
511 |
"text": self._retrieve_text(query),
|
512 |
"images": self._retrieve_images(query),
|
513 |
"code": self._retrieve_code(query)
|
514 |
}
|
|
|
515 |
|
516 |
def _retrieve_text(self, query: str) -> List[Any]:
|
517 |
return self.text_retriever.invoke(query)
|
|
|
520 |
inputs = self.clip_processor(text=query, return_tensors="pt")
|
521 |
with torch.no_grad():
|
522 |
_ = self.clip_model.get_text_features(**inputs)
|
523 |
+
# Placeholder for image retrieval results
|
524 |
return ["image_result_1.png", "image_result_2.png"]
|
525 |
|
526 |
def _retrieve_code(self, query: str) -> List[str]:
|
|
|
531 |
# ------------------------------
|
532 |
class ResearchWorkflow:
|
533 |
"""
|
534 |
+
Defines a multi-step research workflow using a state graph.
|
535 |
"""
|
536 |
def __init__(self) -> None:
|
537 |
self.processor = EnhancedCognitiveProcessor()
|
|
|
564 |
try:
|
565 |
query = state["messages"][-1].content
|
566 |
domain = state.get("domain", "Biomedical Research")
|
567 |
+
new_context = {
|
568 |
+
"raw_query": query,
|
569 |
+
"domain": domain,
|
570 |
+
"refine_count": 0,
|
571 |
+
"refinement_history": []
|
572 |
+
}
|
573 |
logger.info(f"Query ingested. Domain: {domain}")
|
574 |
return {
|
575 |
"messages": [AIMessage(content="Query ingested successfully")],
|
|
|
601 |
|
602 |
def analyze_content(self, state: AgentState) -> Dict:
|
603 |
"""
|
604 |
+
Analyzes the retrieved documents. If a domain-specific fallback is available, it is used;
|
605 |
+
otherwise, the system synthesizes a comprehensive analysis via the cognitive processor.
|
606 |
"""
|
607 |
try:
|
608 |
domain = state["context"].get("domain", "Biomedical Research").strip().lower()
|
|
|
636 |
analysis = state["messages"][-1].content
|
637 |
validation_prompt = (
|
638 |
f"Validate the following research analysis:\n{analysis}\n\n"
|
639 |
+
"Check for:\n"
|
640 |
+
"1. Technical accuracy\n"
|
641 |
+
"2. Citation support (are claims backed by evidence?)\n"
|
642 |
+
"3. Logical consistency\n"
|
643 |
+
"4. Methodological soundness\n\n"
|
644 |
+
"Respond with 'VALID: [brief justification]' or 'INVALID: [brief justification]'."
|
645 |
)
|
646 |
response = self.processor.process_query(validation_prompt)
|
647 |
logger.info("Output validation completed.")
|
|
|
667 |
"You are given the following series of refinement outputs:\n" +
|
668 |
"\n---\n".join(refinement_history) +
|
669 |
"\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
|
670 |
+
"Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand."
|
671 |
)
|
672 |
meta_response = self.processor.process_query(meta_prompt)
|
673 |
logger.info("Meta-refinement completed.")
|
|
|
678 |
else:
|
679 |
refinement_prompt = (
|
680 |
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
|
681 |
+
"First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n"
|
682 |
+
"Then, improve the following aspects:\n"
|
683 |
+
"1. Technical precision\n"
|
684 |
+
"2. Empirical grounding\n"
|
685 |
+
"3. Theoretical coherence\n\n"
|
686 |
"Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
|
687 |
)
|
688 |
response = self.processor.process_query(refinement_prompt)
|
|
|
799 |
unsafe_allow_html=True
|
800 |
)
|
801 |
|
|
|
802 |
def _build_sidebar(self) -> None:
|
803 |
with st.sidebar:
|
804 |
st.title("🔍 Research Database")
|
|
|
901 |
# Multi-Modal Retriever Initialization
|
902 |
# ------------------------------
|
903 |
from transformers import CLIPProcessor, CLIPModel
|
904 |
+
|
905 |
+
# Load CLIP components
|
906 |
clip_model = CLIPModel.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
|
907 |
clip_processor = CLIPProcessor.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
|
908 |
+
|
909 |
+
# Update the ExtendedQuantumDocumentManager with the loaded CLIP components
|
910 |
+
qdm.clip_model = clip_model
|
911 |
+
qdm.clip_processor = clip_processor
|
912 |
+
|
913 |
multi_retriever = MultiModalRetriever(retriever.research_retriever, clip_model, clip_processor)
|
914 |
|
915 |
# ------------------------------
|
916 |
# Execute the Application
|
917 |
# ------------------------------
|
918 |
class ResearchInterfaceExtended(ResearchInterface):
|
919 |
+
"""
|
920 |
+
Extended interface that includes domain adaptability, collaboration features, and graph visualization.
|
921 |
+
"""
|
922 |
def _build_main_interface(self) -> None:
|
923 |
super()._build_main_interface()
|
924 |
|