Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# ------------------------------
|
2 |
# Enhanced NeuroResearch AI System with Domain Adaptability,
|
3 |
-
# Refinement Counter, Dynamic Difficulty Gradient,
|
|
|
4 |
# ------------------------------
|
5 |
import logging
|
6 |
import os
|
@@ -16,6 +17,8 @@ from typing import List, Dict, Any, Optional, Sequence
|
|
16 |
import chromadb
|
17 |
import requests
|
18 |
import streamlit as st
|
|
|
|
|
19 |
|
20 |
# LangChain and LangGraph imports
|
21 |
from langchain_openai import OpenAIEmbeddings
|
@@ -28,7 +31,7 @@ from langgraph.graph.message import add_messages
|
|
28 |
from typing_extensions import TypedDict, Annotated
|
29 |
from langchain.tools.retriever import create_retriever_tool
|
30 |
|
31 |
-
# Increase Python's recursion limit
|
32 |
sys.setrecursionlimit(1000)
|
33 |
|
34 |
# ------------------------------
|
@@ -66,7 +69,6 @@ class ResearchConfig:
|
|
66 |
"Latest Trends in Machine Learning Methods Using Quantum Computing":
|
67 |
"Quantum ML Frontiers"
|
68 |
}
|
69 |
-
# Base analysis template remains unchanged.
|
70 |
ANALYSIS_TEMPLATE = (
|
71 |
"Analyze these technical documents with scientific rigor:\n{context}\n\n"
|
72 |
"Respond with:\n"
|
@@ -77,7 +79,6 @@ class ResearchConfig:
|
|
77 |
"5. Limitations & Future Directions\n\n"
|
78 |
"Format: Markdown with LaTeX mathematical notation where applicable"
|
79 |
)
|
80 |
-
# Domain-specific prompt additions for customizable analysis
|
81 |
DOMAIN_PROMPTS = {
|
82 |
"Biomedical Research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
|
83 |
"Legal Research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
|
@@ -85,6 +86,14 @@ class ResearchConfig:
|
|
85 |
"Competitive Programming and Theoretical Computer Science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
|
86 |
"Social Sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
|
87 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
90 |
st.error(
|
@@ -186,7 +195,6 @@ class ResearchRetriever:
|
|
186 |
For now, domain differentiation is minimal; however, you can extend this method to use domain-specific collections.
|
187 |
"""
|
188 |
try:
|
189 |
-
# For demonstration, we use the "research" collection for all domains.
|
190 |
return self.research_retriever.invoke(query)
|
191 |
except Exception as e:
|
192 |
logger.exception(f"Retrieval error for domain '{domain}'.")
|
@@ -211,9 +219,8 @@ class CognitiveProcessor:
|
|
211 |
Processes a query by sending multiple API requests in parallel.
|
212 |
"""
|
213 |
futures = []
|
214 |
-
for _ in range(3):
|
215 |
futures.append(self.executor.submit(self._execute_api_request, prompt))
|
216 |
-
|
217 |
results = []
|
218 |
for future in as_completed(futures):
|
219 |
try:
|
@@ -221,7 +228,6 @@ class CognitiveProcessor:
|
|
221 |
except Exception as e:
|
222 |
logger.exception("Error during API request execution.")
|
223 |
st.error(f"Processing Error: {str(e)}")
|
224 |
-
|
225 |
return self._consensus_check(results)
|
226 |
|
227 |
def _execute_api_request(self, prompt: str) -> Dict:
|
@@ -268,26 +274,161 @@ class CognitiveProcessor:
|
|
268 |
return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
269 |
|
270 |
# ------------------------------
|
271 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
# ------------------------------
|
273 |
class ResearchWorkflow:
|
274 |
"""
|
275 |
Defines the multi-step research workflow using a state graph.
|
276 |
"""
|
277 |
def __init__(self) -> None:
|
278 |
-
self.processor =
|
279 |
self.workflow = StateGraph(AgentState)
|
280 |
self._build_workflow()
|
281 |
self.app = self.workflow.compile()
|
282 |
|
283 |
def _build_workflow(self) -> None:
|
284 |
-
# Define nodes
|
285 |
self.workflow.add_node("ingest", self.ingest_query)
|
286 |
self.workflow.add_node("retrieve", self.retrieve_documents)
|
287 |
self.workflow.add_node("analyze", self.analyze_content)
|
288 |
self.workflow.add_node("validate", self.validate_output)
|
289 |
self.workflow.add_node("refine", self.refine_results)
|
290 |
-
# Set
|
291 |
self.workflow.set_entry_point("ingest")
|
292 |
self.workflow.add_edge("ingest", "retrieve")
|
293 |
self.workflow.add_edge("retrieve", "analyze")
|
@@ -298,16 +439,18 @@ class ResearchWorkflow:
|
|
298 |
)
|
299 |
self.workflow.add_edge("validate", END)
|
300 |
self.workflow.add_edge("refine", "retrieve")
|
|
|
|
|
|
|
|
|
301 |
|
302 |
def ingest_query(self, state: AgentState) -> Dict:
|
303 |
"""
|
304 |
-
Ingests the research query and initializes the
|
305 |
"""
|
306 |
try:
|
307 |
query = state["messages"][-1].content
|
308 |
-
# Retrieve domain from the query context if available; otherwise, default to "Biomedical Research"
|
309 |
domain = state.get("domain", "Biomedical Research")
|
310 |
-
# Initialize context with raw query, selected domain, refinement counter, and empty refinement history
|
311 |
new_context = {"raw_query": query, "domain": domain, "refine_count": 0, "refinement_history": []}
|
312 |
logger.info(f"Query ingested. Domain: {domain}")
|
313 |
return {
|
@@ -325,7 +468,6 @@ class ResearchWorkflow:
|
|
325 |
"""
|
326 |
try:
|
327 |
query = state["context"]["raw_query"]
|
328 |
-
# For demonstration, we use the "research" collection for all domains.
|
329 |
docs = retriever.retrieve(query, state["context"].get("domain", "Biomedical Research"))
|
330 |
logger.info(f"Retrieved {len(docs)} documents for query.")
|
331 |
return {
|
@@ -352,7 +494,6 @@ class ResearchWorkflow:
|
|
352 |
docs_text = "\n\n".join([d.page_content for d in docs])
|
353 |
domain = state["context"].get("domain", "Biomedical Research")
|
354 |
domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
|
355 |
-
# Combine domain-specific instructions with the base analysis template
|
356 |
full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
|
357 |
response = self.processor.process_query(full_prompt)
|
358 |
if "error" in response:
|
@@ -395,22 +536,19 @@ class ResearchWorkflow:
|
|
395 |
def refine_results(self, state: AgentState) -> Dict:
|
396 |
"""
|
397 |
Refines the analysis report if validation fails.
|
398 |
-
Implements
|
399 |
Tracks refinement history, uses a dynamic difficulty gradient, and if the refinement count exceeds a threshold,
|
400 |
summarizes the history into a final output.
|
401 |
"""
|
402 |
try:
|
403 |
current_count = state["context"].get("refine_count", 0)
|
404 |
state["context"]["refine_count"] = current_count + 1
|
405 |
-
# Append current analysis to refinement history
|
406 |
refinement_history = state["context"].setdefault("refinement_history", [])
|
407 |
current_analysis = state["messages"][-1].content
|
408 |
refinement_history.append(current_analysis)
|
409 |
-
# Compute a "difficulty level" (from 3 to 0) based on refinement count
|
410 |
difficulty_level = max(0, 3 - state["context"]["refine_count"])
|
411 |
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
|
412 |
|
413 |
-
# If refinement count exceeds threshold, perform meta-refinement by summarizing the history
|
414 |
if state["context"]["refine_count"] >= 3:
|
415 |
meta_prompt = (
|
416 |
"You are given the following series of refinement outputs:\n" +
|
@@ -424,7 +562,6 @@ class ResearchWorkflow:
|
|
424 |
"context": state["context"]
|
425 |
}
|
426 |
else:
|
427 |
-
# Standard refinement with a dynamic difficulty prompt
|
428 |
refinement_prompt = (
|
429 |
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
|
430 |
"Improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
|
@@ -466,11 +603,12 @@ class ResearchWorkflow:
|
|
466 |
}
|
467 |
|
468 |
# ------------------------------
|
469 |
-
# Research Interface
|
470 |
# ------------------------------
|
471 |
class ResearchInterface:
|
472 |
"""
|
473 |
Provides the Streamlit-based interface for executing the research workflow.
|
|
|
474 |
"""
|
475 |
def __init__(self) -> None:
|
476 |
self.workflow = ResearchWorkflow()
|
@@ -540,6 +678,14 @@ class ResearchInterface:
|
|
540 |
st.subheader("Analysis Metrics")
|
541 |
st.metric("Vector Collections", 2)
|
542 |
st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
543 |
|
544 |
def _build_main_interface(self) -> None:
|
545 |
st.title("🧠 NeuroResearch AI")
|
@@ -548,7 +694,6 @@ class ResearchInterface:
|
|
548 |
height=200,
|
549 |
placeholder="Enter technical research question..."
|
550 |
)
|
551 |
-
# Domain selection for research use cases
|
552 |
domain = st.selectbox(
|
553 |
"Select Research Domain:",
|
554 |
options=[
|
@@ -566,7 +711,6 @@ class ResearchInterface:
|
|
566 |
def _execute_analysis(self, query: str, domain: str) -> None:
|
567 |
try:
|
568 |
with st.spinner("Initializing Quantum Analysis..."):
|
569 |
-
# Pass domain into the context by adding it to the initial state
|
570 |
results = self.workflow.app.stream({
|
571 |
"messages": [HumanMessage(content=query)],
|
572 |
"context": {"domain": domain},
|
@@ -614,6 +758,56 @@ Potential issues:
|
|
614 |
st.warning("⚠️ Validation Issues Detected")
|
615 |
with st.expander("View Validation Details", expanded=True):
|
616 |
st.markdown(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
|
618 |
if __name__ == "__main__":
|
619 |
ResearchInterface()
|
|
|
1 |
# ------------------------------
|
2 |
# Enhanced NeuroResearch AI System with Domain Adaptability,
|
3 |
+
# Refinement Counter, Dynamic Difficulty Gradient, Meta-Refinement Inspired by LADDER,
|
4 |
+
# Quantum Knowledge Graph & Multi-Modal Enhancements
|
5 |
# ------------------------------
|
6 |
import logging
|
7 |
import os
|
|
|
17 |
import chromadb
|
18 |
import requests
|
19 |
import streamlit as st
|
20 |
+
from PIL import Image
|
21 |
+
import torch
|
22 |
|
23 |
# LangChain and LangGraph imports
|
24 |
from langchain_openai import OpenAIEmbeddings
|
|
|
31 |
from typing_extensions import TypedDict, Annotated
|
32 |
from langchain.tools.retriever import create_retriever_tool
|
33 |
|
34 |
+
# Increase Python's recursion limit (if needed)
|
35 |
sys.setrecursionlimit(1000)
|
36 |
|
37 |
# ------------------------------
|
|
|
69 |
"Latest Trends in Machine Learning Methods Using Quantum Computing":
|
70 |
"Quantum ML Frontiers"
|
71 |
}
|
|
|
72 |
ANALYSIS_TEMPLATE = (
|
73 |
"Analyze these technical documents with scientific rigor:\n{context}\n\n"
|
74 |
"Respond with:\n"
|
|
|
79 |
"5. Limitations & Future Directions\n\n"
|
80 |
"Format: Markdown with LaTeX mathematical notation where applicable"
|
81 |
)
|
|
|
82 |
DOMAIN_PROMPTS = {
|
83 |
"Biomedical Research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
|
84 |
"Legal Research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
|
|
|
86 |
"Competitive Programming and Theoretical Computer Science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
|
87 |
"Social Sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
|
88 |
}
|
89 |
+
ENSEMBLE_MODELS = {
|
90 |
+
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
|
91 |
+
"deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
|
92 |
+
}
|
93 |
+
CLIP_SETTINGS = {
|
94 |
+
"model": "openai/clip-vit-large-patch14",
|
95 |
+
"image_db": "image_vectors"
|
96 |
+
}
|
97 |
|
98 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
99 |
st.error(
|
|
|
195 |
For now, domain differentiation is minimal; however, you can extend this method to use domain-specific collections.
|
196 |
"""
|
197 |
try:
|
|
|
198 |
return self.research_retriever.invoke(query)
|
199 |
except Exception as e:
|
200 |
logger.exception(f"Retrieval error for domain '{domain}'.")
|
|
|
219 |
Processes a query by sending multiple API requests in parallel.
|
220 |
"""
|
221 |
futures = []
|
222 |
+
for _ in range(3):
|
223 |
futures.append(self.executor.submit(self._execute_api_request, prompt))
|
|
|
224 |
results = []
|
225 |
for future in as_completed(futures):
|
226 |
try:
|
|
|
228 |
except Exception as e:
|
229 |
logger.exception("Error during API request execution.")
|
230 |
st.error(f"Processing Error: {str(e)}")
|
|
|
231 |
return self._consensus_check(results)
|
232 |
|
233 |
def _execute_api_request(self, prompt: str) -> Dict:
|
|
|
274 |
return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
275 |
|
276 |
# ------------------------------
|
277 |
+
# Enhanced Cognitive Processor with Ensemble & Knowledge Graph Integration
|
278 |
+
# ------------------------------
|
279 |
+
class EnhancedCognitiveProcessor(CognitiveProcessor):
|
280 |
+
"""
|
281 |
+
Extended with ensemble processing and knowledge graph integration.
|
282 |
+
"""
|
283 |
+
def __init__(self) -> None:
|
284 |
+
super().__init__()
|
285 |
+
self.knowledge_graph = QuantumKnowledgeGraph()
|
286 |
+
self.ensemble_models = ["deepseek-chat", "deepseek-coder"]
|
287 |
+
|
288 |
+
def process_query(self, prompt: str) -> Dict:
|
289 |
+
futures = []
|
290 |
+
for model in self.ensemble_models:
|
291 |
+
futures.append(self.executor.submit(self._execute_api_request, prompt, model))
|
292 |
+
results = []
|
293 |
+
for future in as_completed(futures):
|
294 |
+
try:
|
295 |
+
results.append(future.result())
|
296 |
+
except Exception as e:
|
297 |
+
logger.error(f"Model processing error: {str(e)}")
|
298 |
+
best_response = self._consensus_check(results)
|
299 |
+
self._update_knowledge_graph(best_response)
|
300 |
+
return best_response
|
301 |
+
|
302 |
+
def _execute_api_request(self, prompt: str, model: str) -> Dict:
|
303 |
+
headers = {
|
304 |
+
"Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
|
305 |
+
"Content-Type": "application/json",
|
306 |
+
"X-Research-Session": self.session_id
|
307 |
+
}
|
308 |
+
payload = {
|
309 |
+
"model": model,
|
310 |
+
"messages": [{
|
311 |
+
"role": "user",
|
312 |
+
"content": f"Respond as Senior AI Researcher:\n{prompt}"
|
313 |
+
}],
|
314 |
+
"temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
|
315 |
+
"max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
|
316 |
+
"top_p": 0.9
|
317 |
+
}
|
318 |
+
try:
|
319 |
+
response = requests.post(
|
320 |
+
"https://api.deepseek.com/v1/chat/completions",
|
321 |
+
headers=headers,
|
322 |
+
json=payload,
|
323 |
+
timeout=45
|
324 |
+
)
|
325 |
+
response.raise_for_status()
|
326 |
+
logger.info(f"API request successful for model {model}.")
|
327 |
+
return response.json()
|
328 |
+
except requests.exceptions.RequestException as e:
|
329 |
+
logger.exception(f"API request failed for model {model}.")
|
330 |
+
return {"error": str(e)}
|
331 |
+
|
332 |
+
def _update_knowledge_graph(self, response: Dict):
|
333 |
+
content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
|
334 |
+
node_id = self.knowledge_graph.create_node({"content": content}, "analysis")
|
335 |
+
if self.knowledge_graph.node_counter > 1:
|
336 |
+
self.knowledge_graph.create_relation(node_id - 1, node_id, "evolution", strength=0.8)
|
337 |
+
|
338 |
+
# ------------------------------
|
339 |
+
# Quantum Knowledge Graph & Multi-Modal Enhancements
|
340 |
+
# ------------------------------
|
341 |
+
from graphviz import Digraph
|
342 |
+
|
343 |
+
class QuantumKnowledgeGraph:
|
344 |
+
"""Dynamic knowledge representation system with multi-modal nodes."""
|
345 |
+
def __init__(self):
|
346 |
+
self.nodes = {}
|
347 |
+
self.relations = []
|
348 |
+
self.node_counter = 0
|
349 |
+
|
350 |
+
def create_node(self, content: Dict, node_type: str) -> int:
|
351 |
+
self.node_counter += 1
|
352 |
+
self.nodes[self.node_counter] = {
|
353 |
+
"id": self.node_counter,
|
354 |
+
"content": content,
|
355 |
+
"type": node_type,
|
356 |
+
"connections": []
|
357 |
+
}
|
358 |
+
return self.node_counter
|
359 |
+
|
360 |
+
def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
|
361 |
+
self.relations.append({
|
362 |
+
"source": source,
|
363 |
+
"target": target,
|
364 |
+
"type": rel_type,
|
365 |
+
"strength": strength
|
366 |
+
})
|
367 |
+
self.nodes[source]["connections"].append(target)
|
368 |
+
|
369 |
+
def visualize_graph(self, focus_node: int = None) -> str:
|
370 |
+
dot = Digraph(engine="neato")
|
371 |
+
for nid, node in self.nodes.items():
|
372 |
+
label = f"{node['type']}\n{self._truncate_content(node['content'])}"
|
373 |
+
dot.node(str(nid), label)
|
374 |
+
for rel in self.relations:
|
375 |
+
dot.edge(str(rel["source"]), str(rel["target"]), label=rel["type"])
|
376 |
+
if focus_node:
|
377 |
+
dot.node(str(focus_node), color="red", style="filled")
|
378 |
+
return dot.source
|
379 |
+
|
380 |
+
def _truncate_content(self, content: Dict) -> str:
|
381 |
+
return json.dumps(content)[:50] + "..."
|
382 |
+
|
383 |
+
class MultiModalRetriever:
|
384 |
+
"""Enhanced retrieval system with hybrid search capabilities."""
|
385 |
+
def __init__(self, text_retriever, clip_model, clip_processor):
|
386 |
+
self.text_retriever = text_retriever
|
387 |
+
self.clip_model = clip_model
|
388 |
+
self.clip_processor = clip_processor
|
389 |
+
self.code_retriever = create_retriever_tool([]) # BM25 retriever placeholder
|
390 |
+
|
391 |
+
def retrieve(self, query: str, domain: str) -> Dict[str, List]:
|
392 |
+
results = {
|
393 |
+
"text": self._retrieve_text(query),
|
394 |
+
"images": self._retrieve_images(query),
|
395 |
+
"code": self._retrieve_code(query)
|
396 |
+
}
|
397 |
+
return results
|
398 |
+
|
399 |
+
def _retrieve_text(self, query: str) -> List[Any]:
|
400 |
+
return self.text_retriever.invoke(query)
|
401 |
+
|
402 |
+
def _retrieve_images(self, query: str) -> List[str]:
|
403 |
+
inputs = self.clip_processor(text=query, return_tensors="pt")
|
404 |
+
with torch.no_grad():
|
405 |
+
text_emb = self.clip_model.get_text_features(**inputs)
|
406 |
+
return ["image_result_1.png", "image_result_2.png"]
|
407 |
+
|
408 |
+
def _retrieve_code(self, query: str) -> List[str]:
|
409 |
+
return self.code_retriever.invoke(query)
|
410 |
+
|
411 |
+
# ------------------------------
|
412 |
+
# Enhanced Research Workflow
|
413 |
# ------------------------------
|
414 |
class ResearchWorkflow:
|
415 |
"""
|
416 |
Defines the multi-step research workflow using a state graph.
|
417 |
"""
|
418 |
def __init__(self) -> None:
|
419 |
+
self.processor = EnhancedCognitiveProcessor()
|
420 |
self.workflow = StateGraph(AgentState)
|
421 |
self._build_workflow()
|
422 |
self.app = self.workflow.compile()
|
423 |
|
424 |
def _build_workflow(self) -> None:
|
425 |
+
# Define nodes from base workflow
|
426 |
self.workflow.add_node("ingest", self.ingest_query)
|
427 |
self.workflow.add_node("retrieve", self.retrieve_documents)
|
428 |
self.workflow.add_node("analyze", self.analyze_content)
|
429 |
self.workflow.add_node("validate", self.validate_output)
|
430 |
self.workflow.add_node("refine", self.refine_results)
|
431 |
+
# Set base edges
|
432 |
self.workflow.set_entry_point("ingest")
|
433 |
self.workflow.add_edge("ingest", "retrieve")
|
434 |
self.workflow.add_edge("retrieve", "analyze")
|
|
|
439 |
)
|
440 |
self.workflow.add_edge("validate", END)
|
441 |
self.workflow.add_edge("refine", "retrieve")
|
442 |
+
# Extended node for multi-modal enhancement
|
443 |
+
self.workflow.add_node("enhance", self.enhance_analysis)
|
444 |
+
self.workflow.add_edge("validate", "enhance")
|
445 |
+
self.workflow.add_edge("enhance", END)
|
446 |
|
447 |
def ingest_query(self, state: AgentState) -> Dict:
|
448 |
"""
|
449 |
+
Ingests the research query and initializes the context with query, domain, refinement counter, and history.
|
450 |
"""
|
451 |
try:
|
452 |
query = state["messages"][-1].content
|
|
|
453 |
domain = state.get("domain", "Biomedical Research")
|
|
|
454 |
new_context = {"raw_query": query, "domain": domain, "refine_count": 0, "refinement_history": []}
|
455 |
logger.info(f"Query ingested. Domain: {domain}")
|
456 |
return {
|
|
|
468 |
"""
|
469 |
try:
|
470 |
query = state["context"]["raw_query"]
|
|
|
471 |
docs = retriever.retrieve(query, state["context"].get("domain", "Biomedical Research"))
|
472 |
logger.info(f"Retrieved {len(docs)} documents for query.")
|
473 |
return {
|
|
|
494 |
docs_text = "\n\n".join([d.page_content for d in docs])
|
495 |
domain = state["context"].get("domain", "Biomedical Research")
|
496 |
domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
|
|
|
497 |
full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
|
498 |
response = self.processor.process_query(full_prompt)
|
499 |
if "error" in response:
|
|
|
536 |
def refine_results(self, state: AgentState) -> Dict:
|
537 |
"""
|
538 |
Refines the analysis report if validation fails.
|
539 |
+
Implements a meta-refinement mechanism inspired by LADDER.
|
540 |
Tracks refinement history, uses a dynamic difficulty gradient, and if the refinement count exceeds a threshold,
|
541 |
summarizes the history into a final output.
|
542 |
"""
|
543 |
try:
|
544 |
current_count = state["context"].get("refine_count", 0)
|
545 |
state["context"]["refine_count"] = current_count + 1
|
|
|
546 |
refinement_history = state["context"].setdefault("refinement_history", [])
|
547 |
current_analysis = state["messages"][-1].content
|
548 |
refinement_history.append(current_analysis)
|
|
|
549 |
difficulty_level = max(0, 3 - state["context"]["refine_count"])
|
550 |
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
|
551 |
|
|
|
552 |
if state["context"]["refine_count"] >= 3:
|
553 |
meta_prompt = (
|
554 |
"You are given the following series of refinement outputs:\n" +
|
|
|
562 |
"context": state["context"]
|
563 |
}
|
564 |
else:
|
|
|
565 |
refinement_prompt = (
|
566 |
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
|
567 |
"Improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
|
|
|
603 |
}
|
604 |
|
605 |
# ------------------------------
|
606 |
+
# Enhanced Research Interface
|
607 |
# ------------------------------
|
608 |
class ResearchInterface:
|
609 |
"""
|
610 |
Provides the Streamlit-based interface for executing the research workflow.
|
611 |
+
Extended with collaboration features and knowledge visualization.
|
612 |
"""
|
613 |
def __init__(self) -> None:
|
614 |
self.workflow = ResearchWorkflow()
|
|
|
678 |
st.subheader("Analysis Metrics")
|
679 |
st.metric("Vector Collections", 2)
|
680 |
st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
|
681 |
+
with st.sidebar.expander("Collaboration Hub"):
|
682 |
+
st.subheader("Live Research Team")
|
683 |
+
st.write("👩💻 Researcher A")
|
684 |
+
st.write("👨🔬 Researcher B")
|
685 |
+
st.write("🤖 AI Assistant")
|
686 |
+
st.subheader("Knowledge Graph")
|
687 |
+
if st.button("🕸 View Current Graph"):
|
688 |
+
self._display_knowledge_graph()
|
689 |
|
690 |
def _build_main_interface(self) -> None:
|
691 |
st.title("🧠 NeuroResearch AI")
|
|
|
694 |
height=200,
|
695 |
placeholder="Enter technical research question..."
|
696 |
)
|
|
|
697 |
domain = st.selectbox(
|
698 |
"Select Research Domain:",
|
699 |
options=[
|
|
|
711 |
def _execute_analysis(self, query: str, domain: str) -> None:
|
712 |
try:
|
713 |
with st.spinner("Initializing Quantum Analysis..."):
|
|
|
714 |
results = self.workflow.app.stream({
|
715 |
"messages": [HumanMessage(content=query)],
|
716 |
"context": {"domain": domain},
|
|
|
758 |
st.warning("⚠️ Validation Issues Detected")
|
759 |
with st.expander("View Validation Details", expanded=True):
|
760 |
st.markdown(content)
|
761 |
+
elif 'enhance' in event:
|
762 |
+
with st.container():
|
763 |
+
content = event['enhance']['messages'][0].content
|
764 |
+
with st.expander("Enhanced Multi-Modal Analysis Report", expanded=True):
|
765 |
+
st.markdown(content)
|
766 |
+
|
767 |
+
def _display_knowledge_graph(self) -> None:
|
768 |
+
graph = self.workflow.processor.knowledge_graph.visualize_graph()
|
769 |
+
st.graphviz_chart(graph)
|
770 |
+
|
771 |
+
# ------------------------------
|
772 |
+
# Multi-Modal Retriever Initialization
|
773 |
+
# ------------------------------
|
774 |
+
from transformers import CLIPProcessor, CLIPModel
|
775 |
+
clip_model = CLIPModel.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
|
776 |
+
clip_processor = CLIPProcessor.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
|
777 |
+
multi_retriever = MultiModalRetriever(retriever.research_retriever, clip_model, clip_processor)
|
778 |
+
|
779 |
+
# ------------------------------
|
780 |
+
# Updated Document Processing for Multi-Modal Documents
|
781 |
+
# ------------------------------
|
782 |
+
class QuantumDocumentManager(QuantumDocumentManager):
|
783 |
+
"""Extended with multi-modal document handling."""
|
784 |
+
def create_image_collection(self, image_paths: List[str]):
|
785 |
+
embeddings = []
|
786 |
+
for img_path in image_paths:
|
787 |
+
image = Image.open(img_path)
|
788 |
+
inputs = clip_processor(images=image, return_tensors="pt")
|
789 |
+
with torch.no_grad():
|
790 |
+
emb = clip_model.get_image_features(**inputs)
|
791 |
+
embeddings.append(emb.numpy())
|
792 |
+
return Chroma.from_embeddings(
|
793 |
+
embeddings=embeddings,
|
794 |
+
documents=image_paths,
|
795 |
+
collection_name="neuro_images"
|
796 |
+
)
|
797 |
+
|
798 |
+
# Initialize image collection
|
799 |
+
qdm.create_image_collection([
|
800 |
+
"data/images/quantum_computing.png",
|
801 |
+
"data/images/neural_arch.png"
|
802 |
+
])
|
803 |
+
|
804 |
+
# ------------------------------
|
805 |
+
# Execute the Application
|
806 |
+
# ------------------------------
|
807 |
+
class ResearchInterface(ResearchInterface):
|
808 |
+
"""Extended with domain adaptability, collaboration, and graph visualization."""
|
809 |
+
def _build_main_interface(self) -> None:
|
810 |
+
super()._build_main_interface()
|
811 |
|
812 |
if __name__ == "__main__":
|
813 |
ResearchInterface()
|