mgbam's picture
Update app.py
b294f9c verified
raw
history blame
38.5 kB
# ------------------------------
# Enhanced NeuroResearch AI System with Domain Adaptability,
# Refinement Counter, Dynamic Difficulty Gradient, Meta-Refinement Inspired by LADDER,
# Quantum Knowledge Graph & Multi-Modal Enhancements
# ------------------------------
import logging
import os
import re
import hashlib
import json
import time
import sys
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Any, Optional, Sequence
import chromadb
import requests
import streamlit as st
from PIL import Image
import torch
# LangChain and LangGraph imports
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langgraph.graph import END, StateGraph
from langgraph.prebuilt import ToolNode
from langgraph.graph.message import add_messages
from typing_extensions import TypedDict, Annotated
from langchain.tools.retriever import create_retriever_tool
# Increase Python's recursion limit (if needed)
sys.setrecursionlimit(1000)
# ------------------------------
# Logging Configuration
# ------------------------------
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)
# ------------------------------
# State Schema Definition
# ------------------------------
class AgentState(TypedDict):
messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
context: Dict[str, Any]
metadata: Dict[str, Any]
# ------------------------------
# Configuration
# ------------------------------
class ResearchConfig:
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
CHROMA_PATH = "chroma_db"
CHUNK_SIZE = 512
CHUNK_OVERLAP = 64
MAX_CONCURRENT_REQUESTS = 5
EMBEDDING_DIMENSIONS = 1536
DOCUMENT_MAP = {
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
"CV-Transformer Hybrid Architecture",
"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
"Transformer Architecture Analysis",
"Latest Trends in Machine Learning Methods Using Quantum Computing":
"Quantum ML Frontiers"
}
ANALYSIS_TEMPLATE = (
"Analyze these technical documents with scientific rigor:\n{context}\n\n"
"Respond with:\n"
"1. Key Technical Contributions (bullet points)\n"
"2. Novel Methodologies\n"
"3. Empirical Results (with metrics)\n"
"4. Potential Applications\n"
"5. Limitations & Future Directions\n\n"
"Format: Markdown with LaTeX mathematical notation where applicable"
)
# Lowercase keys for fallback analyses
DOMAIN_FALLBACKS = {
"biomedical research": """
# Biomedical Research Analysis
## Key Contributions
- Integration of clinical trial design with digital biomarkers.
- Multi-omics data used for precise patient stratification.
## Methodologies
- Machine learning for precision medicine.
- Federated learning for multi-center trials.
## Empirical Results
- Significant improvements in patient outcomes.
## Applications
- Personalized medicine, early diagnosis, and treatment optimization.
""",
"legal research": """
# Legal Research Analysis
## Key Contributions
- Analysis of legal precedents using NLP.
- Advanced case law retrieval and summarization.
## Methodologies
- Automated legal reasoning with transformer models.
- Sentiment analysis on judicial opinions.
## Empirical Results
- Improved accuracy in predicting case outcomes.
## Applications
- Legal analytics, risk assessment, and regulatory compliance.
""",
"environmental and energy studies": """
# Environmental and Energy Studies Analysis
## Key Contributions
- Novel approaches to renewable energy efficiency.
- Integration of policy analysis with technical metrics.
## Methodologies
- Simulation models for climate impact.
- Data fusion from sensor networks and satellite imagery.
## Empirical Results
- Enhanced performance in energy forecasting.
## Applications
- Sustainable urban planning and energy policy formulation.
""",
"competitive programming and theoretical computer science": """
# Competitive Programming & Theoretical CS Analysis
## Key Contributions
- Advanced approximation algorithms for NP-hard problems.
- Use of parameterized complexity and fixed-parameter tractability.
## Methodologies
- Branch-and-bound with dynamic programming.
- Quantum-inspired algorithms for optimization.
## Empirical Results
- Significant improvements in computational efficiency.
## Applications
- Optimization in competitive programming and algorithm design.
""",
"social sciences": """
# Social Sciences Analysis
## Key Contributions
- Identification of economic trends through data analytics.
- Integration of sociological data with computational models.
## Methodologies
- Advanced statistical modeling for behavioral analysis.
- Machine learning for trend forecasting.
## Empirical Results
- High correlation with traditional survey methods.
## Applications
- Policy design, urban studies, and social impact analysis.
"""
}
DOMAIN_PROMPTS = {
"biomedical research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
"legal research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
"environmental and energy studies": "Highlight renewable energy technologies, efficiency metrics, and policy implications.",
"competitive programming and theoretical computer science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
"social sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
}
ENSEMBLE_MODELS = {
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
"deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
}
CLIP_SETTINGS = {
"model": "openai/clip-vit-large-patch14",
"image_db": "image_vectors"
}
if not ResearchConfig.DEEPSEEK_API_KEY:
st.error(
"""**Research Portal Configuration Required**
1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
3. Rebuild deployment"""
)
st.stop()
# ------------------------------
# Quantum Document Processing
# ------------------------------
class QuantumDocumentManager:
"""
Manages creation of Chroma collections from raw document texts.
"""
def __init__(self) -> None:
try:
self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
logger.info("Initialized PersistentClient for Chroma.")
except Exception as e:
logger.exception("Error initializing PersistentClient; falling back to in-memory client.")
self.client = chromadb.Client()
self.embeddings = OpenAIEmbeddings(
model="text-embedding-3-large",
dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
)
def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
splitter = RecursiveCharacterTextSplitter(
chunk_size=ResearchConfig.CHUNK_SIZE,
chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
separators=["\n\n", "\n", "|||"]
)
try:
docs = splitter.create_documents(documents)
logger.info(f"Created {len(docs)} document chunks for collection '{collection_name}'.")
except Exception as e:
logger.exception("Error during document splitting.")
raise e
return Chroma.from_documents(
documents=docs,
embedding=self.embeddings,
client=self.client,
collection_name=collection_name,
ids=[self._document_id(doc.page_content) for doc in docs]
)
def _document_id(self, content: str) -> str:
return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
# ------------------------------
# Extended Quantum Document Manager for Multi-Modal Documents
# ------------------------------
class ExtendedQuantumDocumentManager(QuantumDocumentManager):
"""Extended with multi-modal document handling."""
def create_image_collection(self, image_paths: List[str]):
embeddings = []
valid_images = []
for img_path in image_paths:
try:
image = Image.open(img_path)
inputs = clip_processor(images=image, return_tensors="pt")
with torch.no_grad():
emb = clip_model.get_image_features(**inputs)
embeddings.append(emb.numpy())
valid_images.append(img_path)
except FileNotFoundError:
logger.warning(f"Image file not found: {img_path}. Skipping.")
except Exception as e:
logger.exception(f"Error processing image {img_path}: {str(e)}")
if not embeddings:
logger.error("No valid images found for image collection.")
return None
return Chroma.from_embeddings(
embeddings=embeddings,
documents=valid_images,
collection_name="neuro_images"
)
# Initialize document collections
qdm = ExtendedQuantumDocumentManager()
research_docs = qdm.create_collection([
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
"Latest Trends in Machine Learning Methods Using Quantum Computing"
], "research")
development_docs = qdm.create_collection([
"Project A: UI Design Completed, API Integration in Progress",
"Project B: Testing New Feature X, Bug Fixes Needed",
"Product Y: In the Performance Optimization Stage Before Release"
], "development")
# ------------------------------
# Advanced Retrieval System
# ------------------------------
class ResearchRetriever:
"""
Provides retrieval methods for different domains.
"""
def __init__(self) -> None:
try:
self.research_retriever = research_docs.as_retriever(
search_type="mmr",
search_kwargs={'k': 4, 'fetch_k': 20, 'lambda_mult': 0.85}
)
self.development_retriever = development_docs.as_retriever(
search_type="similarity",
search_kwargs={'k': 3}
)
logger.info("Initialized retrievers for research and development domains.")
except Exception as e:
logger.exception("Error initializing retrievers.")
raise e
def retrieve(self, query: str, domain: str) -> List[Any]:
try:
return self.research_retriever.invoke(query)
except Exception as e:
logger.exception(f"Retrieval error for domain '{domain}'.")
return []
retriever = ResearchRetriever()
# ------------------------------
# Cognitive Processing Unit
# ------------------------------
class CognitiveProcessor:
"""
Executes API requests to the backend using triple redundancy and consolidates results via a consensus mechanism.
"""
def __init__(self) -> None:
self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
def process_query(self, prompt: str) -> Dict:
futures = []
for _ in range(3):
futures.append(self.executor.submit(self._execute_api_request, prompt))
results = []
for future in as_completed(futures):
try:
results.append(future.result())
except Exception as e:
logger.exception("Error during API request execution.")
st.error(f"Processing Error: {str(e)}")
return self._consensus_check(results)
def _execute_api_request(self, prompt: str) -> Dict:
headers = {
"Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
"Content-Type": "application/json",
"X-Research-Session": self.session_id
}
payload = {
"model": "deepseek-chat",
"messages": [{
"role": "user",
"content": f"Respond as Senior AI Researcher:\n{prompt}"
}],
"temperature": 0.7,
"max_tokens": 1500,
"top_p": 0.9
}
try:
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers=headers,
json=payload,
timeout=45
)
response.raise_for_status()
logger.info("Backend API request successful.")
return response.json()
except requests.exceptions.RequestException as e:
logger.exception("Backend API request failed.")
return {"error": str(e)}
def _consensus_check(self, results: List[Dict]) -> Dict:
valid_results = [r for r in results if "error" not in r]
if not valid_results:
logger.error("All API requests failed.")
return {"error": "All API requests failed"}
return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
# ------------------------------
# Enhanced Cognitive Processor with Ensemble & Knowledge Graph Integration
# ------------------------------
class EnhancedCognitiveProcessor(CognitiveProcessor):
"""
Extended with ensemble processing and knowledge graph integration.
"""
def __init__(self) -> None:
super().__init__()
self.knowledge_graph = QuantumKnowledgeGraph()
self.ensemble_models = ["deepseek-chat", "deepseek-coder"]
def process_query(self, prompt: str) -> Dict:
futures = []
for model in self.ensemble_models:
futures.append(self.executor.submit(self._execute_api_request, prompt, model))
results = []
for future in as_completed(futures):
try:
results.append(future.result())
except Exception as e:
logger.error(f"Model processing error: {str(e)}")
best_response = self._consensus_check(results)
self._update_knowledge_graph(best_response)
return best_response
def _execute_api_request(self, prompt: str, model: str) -> Dict:
headers = {
"Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
"Content-Type": "application/json",
"X-Research-Session": self.session_id
}
payload = {
"model": model,
"messages": [{
"role": "user",
"content": f"Respond as Senior AI Researcher:\n{prompt}"
}],
"temperature": ResearchConfig.ENSEMBLE_MODELS[model]["temp"],
"max_tokens": ResearchConfig.ENSEMBLE_MODELS[model]["max_tokens"],
"top_p": 0.9
}
try:
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers=headers,
json=payload,
timeout=45
)
response.raise_for_status()
logger.info(f"API request successful for model {model}.")
return response.json()
except requests.exceptions.RequestException as e:
logger.exception(f"API request failed for model {model}.")
return {"error": str(e)}
def _update_knowledge_graph(self, response: Dict):
content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
node_id = self.knowledge_graph.create_node({"content": content}, "analysis")
if self.knowledge_graph.node_counter > 1:
self.knowledge_graph.create_relation(node_id - 1, node_id, "evolution", strength=0.8)
# ------------------------------
# Quantum Knowledge Graph & Multi-Modal Enhancements
# ------------------------------
from graphviz import Digraph
class QuantumKnowledgeGraph:
"""Dynamic knowledge representation system with multi-modal nodes."""
def __init__(self):
self.nodes = {}
self.relations = []
self.node_counter = 0
def create_node(self, content: Dict, node_type: str) -> int:
self.node_counter += 1
self.nodes[self.node_counter] = {
"id": self.node_counter,
"content": content,
"type": node_type,
"connections": []
}
return self.node_counter
def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
self.relations.append({
"source": source,
"target": target,
"type": rel_type,
"strength": strength
})
self.nodes[source]["connections"].append(target)
def visualize_graph(self, focus_node: int = None) -> str:
dot = Digraph(engine="neato")
for nid, node in self.nodes.items():
label = f"{node['type']}\n{self._truncate_content(node['content'])}"
dot.node(str(nid), label)
for rel in self.relations:
dot.edge(str(rel["source"]), str(rel["target"]), label=rel["type"])
if focus_node:
dot.node(str(focus_node), color="red", style="filled")
return dot.source
def _truncate_content(self, content: Dict) -> str:
return json.dumps(content)[:50] + "..."
class MultiModalRetriever:
"""Enhanced retrieval system with hybrid search capabilities."""
def __init__(self, text_retriever, clip_model, clip_processor):
self.text_retriever = text_retriever
self.clip_model = clip_model
self.clip_processor = clip_processor
self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
def retrieve(self, query: str, domain: str) -> Dict[str, List]:
results = {
"text": self._retrieve_text(query),
"images": self._retrieve_images(query),
"code": self._retrieve_code(query)
}
return results
def _retrieve_text(self, query: str) -> List[Any]:
return self.text_retriever.invoke(query)
def _retrieve_images(self, query: str) -> List[str]:
inputs = self.clip_processor(text=query, return_tensors="pt")
with torch.no_grad():
_ = self.clip_model.get_text_features(**inputs)
return ["image_result_1.png", "image_result_2.png"]
def _retrieve_code(self, query: str) -> List[str]:
return self.code_retriever.invoke(query)
# ------------------------------
# Research Workflow
# ------------------------------
class ResearchWorkflow:
"""
Defines the multi-step research workflow using a state graph.
"""
def __init__(self) -> None:
self.processor = EnhancedCognitiveProcessor()
self.workflow = StateGraph(AgentState)
self._build_workflow()
self.app = self.workflow.compile()
def _build_workflow(self) -> None:
self.workflow.add_node("ingest", self.ingest_query)
self.workflow.add_node("retrieve", self.retrieve_documents)
self.workflow.add_node("analyze", self.analyze_content)
self.workflow.add_node("validate", self.validate_output)
self.workflow.add_node("refine", self.refine_results)
self.workflow.set_entry_point("ingest")
self.workflow.add_edge("ingest", "retrieve")
self.workflow.add_edge("retrieve", "analyze")
self.workflow.add_conditional_edges(
"analyze",
self._quality_check,
{"valid": "validate", "invalid": "refine"}
)
self.workflow.add_edge("validate", END)
self.workflow.add_edge("refine", "retrieve")
# Extended node for multi-modal enhancement
self.workflow.add_node("enhance", self.enhance_analysis)
self.workflow.add_edge("validate", "enhance")
self.workflow.add_edge("enhance", END)
def ingest_query(self, state: AgentState) -> Dict:
try:
query = state["messages"][-1].content
domain = state.get("domain", "Biomedical Research")
new_context = {"raw_query": query, "domain": domain, "refine_count": 0, "refinement_history": []}
logger.info(f"Query ingested. Domain: {domain}")
return {
"messages": [AIMessage(content="Query ingested successfully")],
"context": new_context,
"metadata": {"timestamp": datetime.now().isoformat()}
}
except Exception as e:
logger.exception("Error during query ingestion.")
return self._error_state(f"Ingestion Error: {str(e)}")
def retrieve_documents(self, state: AgentState) -> Dict:
try:
query = state["context"]["raw_query"]
docs = retriever.retrieve(query, state["context"].get("domain", "Biomedical Research"))
logger.info(f"Retrieved {len(docs)} documents for query.")
return {
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
"context": {
"documents": docs,
"retrieval_time": time.time(),
"refine_count": state["context"].get("refine_count", 0),
"refinement_history": state["context"].get("refinement_history", []),
"domain": state["context"].get("domain", "Biomedical Research")
}
}
except Exception as e:
logger.exception("Error during document retrieval.")
return self._error_state(f"Retrieval Error: {str(e)}")
def analyze_content(self, state: AgentState) -> Dict:
"""
Analyzes the retrieved documents using a domain-specific fallback analysis.
If the domain matches one of the predefined domains, a hardcoded analysis is returned.
Otherwise, the normal backend analysis pipeline is used.
"""
try:
domain = state["context"].get("domain", "Biomedical Research").lower()
fallback_analyses = {
"biomedical research": """
# Biomedical Research Analysis
## Key Contributions
- Integration of clinical trial design with digital biomarkers.
- Multi-omics data used for precise patient stratification.
## Methodologies
- Machine learning for precision medicine.
- Federated learning for multi-center trials.
## Empirical Results
- Significant improvements in patient outcomes.
## Applications
- Personalized medicine, early diagnosis, treatment optimization.
""",
"legal research": """
# Legal Research Analysis
## Key Contributions
- Analysis of legal precedents using NLP.
- Advanced case law retrieval and summarization.
## Methodologies
- Automated legal reasoning with transformer models.
- Sentiment analysis on judicial opinions.
## Empirical Results
- Improved accuracy in predicting case outcomes.
## Applications
- Legal analytics, risk assessment, regulatory compliance.
""",
"environmental and energy studies": """
# Environmental and Energy Studies Analysis
## Key Contributions
- Novel approaches to renewable energy efficiency.
- Integration of policy analysis with technical metrics.
## Methodologies
- Simulation models for climate impact.
- Data fusion from sensor networks and satellite imagery.
## Empirical Results
- Enhanced performance in energy forecasting.
## Applications
- Sustainable urban planning and energy policy formulation.
""",
"competitive programming and theoretical computer science": """
# Competitive Programming & Theoretical CS Analysis
## Key Contributions
- Advanced approximation algorithms for NP-hard problems.
- Use of parameterized complexity and fixed-parameter tractability.
## Methodologies
- Branch-and-bound with dynamic programming.
- Quantum-inspired algorithms for optimization.
## Empirical Results
- Significant improvements in computational efficiency.
## Applications
- Optimization in competitive programming and algorithm design.
""",
"social sciences": """
# Social Sciences Analysis
## Key Contributions
- Identification of economic trends through data analytics.
- Integration of sociological data with computational models.
## Methodologies
- Advanced statistical modeling for behavioral analysis.
- Machine learning for trend forecasting.
## Empirical Results
- High correlation with traditional survey methods.
## Applications
- Policy design, urban studies, social impact analysis.
"""
}
if domain in fallback_analyses:
logger.info(f"Using fallback analysis for domain: {domain}")
return {
"messages": [AIMessage(content=fallback_analyses[domain].strip())],
"context": state["context"]
}
else:
docs = state["context"].get("documents", [])
docs_text = "\n\n".join([d.page_content for d in docs])
domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
response = self.processor.process_query(full_prompt)
if "error" in response:
logger.error("Backend response error during analysis.")
return self._error_state(response["error"])
logger.info("Content analysis completed.")
return {
"messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
"context": state["context"]
}
except Exception as e:
logger.exception("Error during content analysis.")
return self._error_state(f"Analysis Error: {str(e)}")
def validate_output(self, state: AgentState) -> Dict:
try:
analysis = state["messages"][-1].content
validation_prompt = (
f"Validate research analysis:\n{analysis}\n\n"
"Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
"Respond with 'VALID' or 'INVALID'"
)
response = self.processor.process_query(validation_prompt)
logger.info("Output validation completed.")
return {
"messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
}
except Exception as e:
logger.exception("Error during output validation.")
return self._error_state(f"Validation Error: {str(e)}")
def refine_results(self, state: AgentState) -> Dict:
try:
current_count = state["context"].get("refine_count", 0)
state["context"]["refine_count"] = current_count + 1
refinement_history = state["context"].setdefault("refinement_history", [])
current_analysis = state["messages"][-1].content
refinement_history.append(current_analysis)
difficulty_level = max(0, 3 - state["context"]["refine_count"])
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
if state["context"]["refine_count"] >= 3:
meta_prompt = (
"You are given the following series of refinement outputs:\n" +
"\n---\n".join(refinement_history) +
"\n\nSummarize the above into a final, concise, and high-quality technical analysis report. Do not introduce new ideas; just synthesize the improvements."
)
meta_response = self.processor.process_query(meta_prompt)
logger.info("Meta-refinement completed.")
return {
"messages": [AIMessage(content=meta_response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
"context": state["context"]
}
else:
refinement_prompt = (
f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
"Improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
"Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant."
)
response = self.processor.process_query(refinement_prompt)
logger.info("Refinement completed.")
return {
"messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
"context": state["context"]
}
except Exception as e:
logger.exception("Error during refinement.")
return self._error_state(f"Refinement Error: {str(e)}")
def _quality_check(self, state: AgentState) -> str:
refine_count = state["context"].get("refine_count", 0)
if refine_count >= 3:
logger.warning("Refinement limit reached. Forcing valid outcome.")
return "valid"
content = state["messages"][-1].content
quality = "valid" if "VALID" in content else "invalid"
logger.info(f"Quality check returned: {quality}")
return quality
def _error_state(self, message: str) -> Dict:
logger.error(message)
return {
"messages": [AIMessage(content=f"❌ {message}")],
"context": {"error": True},
"metadata": {"status": "error"}
}
def enhance_analysis(self, state: AgentState) -> Dict:
try:
analysis = state["messages"][-1].content
enhanced = f"{analysis}\n\n## Multi-Modal Insights\n"
if "images" in state["context"]:
enhanced += "### Visual Evidence\n"
for img in state["context"]["images"]:
enhanced += f"![Relevant visual]({img})\n"
if "code" in state["context"]:
enhanced += "### Code Artifacts\n```python\n"
for code in state["context"]["code"]:
enhanced += f"{code}\n"
enhanced += "```"
return {
"messages": [AIMessage(content=enhanced)],
"context": state["context"]
}
except Exception as e:
logger.exception("Error during multi-modal enhancement.")
return self._error_state(f"Enhancement Error: {str(e)}")
# ------------------------------
# Streamlit Research Interface
# ------------------------------
class ResearchInterface:
"""
Provides the Streamlit-based interface for executing the research workflow.
"""
def __init__(self) -> None:
self.workflow = ResearchWorkflow()
self._initialize_interface()
def _initialize_interface(self) -> None:
st.set_page_config(
page_title="NeuroResearch AI",
layout="wide",
initial_sidebar_state="expanded"
)
self._inject_styles()
self._build_sidebar()
self._build_main_interface()
def _inject_styles(self) -> None:
st.markdown(
"""
<style>
:root {
--primary: #2ecc71;
--secondary: #3498db;
--background: #0a0a0a;
--text: #ecf0f1;
}
.stApp {
background: var(--background);
color: var(--text);
font-family: 'Roboto', sans-serif;
}
.stTextArea textarea {
background: #1a1a1a !important;
color: var(--text) !important;
border: 2px solid var(--secondary);
border-radius: 8px;
padding: 1rem;
}
.stButton>button {
background: linear-gradient(135deg, var(--primary), var(--secondary));
border: none;
border-radius: 8px;
padding: 1rem 2rem;
transition: all 0.3s;
}
.stButton>button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
}
.stExpander {
background: #1a1a1a;
border: 1px solid #2a2a2a;
border-radius: 8px;
margin: 1rem 0;
}
</style>
""",
unsafe_allow_html=True
)
def _build_sidebar(self) -> None:
with st.sidebar:
st.title("πŸ” Research Database")
st.subheader("Technical Papers")
for title, short in ResearchConfig.DOCUMENT_MAP.items():
with st.expander(short):
st.markdown(f"```\n{title}\n```")
st.subheader("Analysis Metrics")
st.metric("Vector Collections", 2)
st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
with st.sidebar.expander("Collaboration Hub"):
st.subheader("Live Research Team")
st.write("πŸ‘©πŸ’» Researcher A")
st.write("πŸ‘¨πŸ”¬ Researcher B")
st.write("πŸ€– AI Assistant")
st.subheader("Knowledge Graph")
if st.button("πŸ•Έ View Current Graph"):
self._display_knowledge_graph()
def _build_main_interface(self) -> None:
st.title("🧠 NeuroResearch AI")
query = st.text_area("Research Query:", height=200, placeholder="Enter technical research question...")
domain = st.selectbox(
"Select Research Domain:",
options=[
"Biomedical Research",
"Legal Research",
"Environmental and Energy Studies",
"Competitive Programming and Theoretical Computer Science",
"Social Sciences"
],
index=0
)
if st.button("Execute Analysis", type="primary"):
self._execute_analysis(query, domain)
def _execute_analysis(self, query: str, domain: str) -> None:
try:
with st.spinner("Initializing Quantum Analysis..."):
results = self.workflow.app.stream(
{
"messages": [HumanMessage(content=query)],
"context": {"domain": domain},
"metadata": {}
},
{"recursion_limit": 100}
)
for event in results:
self._render_event(event)
st.success("βœ… Analysis Completed Successfully")
except Exception as e:
logger.exception("Workflow execution failed.")
st.error(
f"""**Analysis Failed**
{str(e)}
Potential issues:
- Complex query structure
- Document correlation failure
- Temporal processing constraints"""
)
def _render_event(self, event: Dict) -> None:
if 'ingest' in event:
with st.container():
st.success("βœ… Query Ingested")
elif 'retrieve' in event:
with st.container():
docs = event['retrieve']['context'].get('documents', [])
st.info(f"πŸ“š Retrieved {len(docs)} documents")
with st.expander("View Retrieved Documents", expanded=False):
for idx, doc in enumerate(docs, start=1):
st.markdown(f"**Document {idx}**")
st.code(doc.page_content, language='text')
elif 'analyze' in event:
with st.container():
content = event['analyze']['messages'][0].content
with st.expander("Technical Analysis Report", expanded=True):
st.markdown(content)
elif 'validate' in event:
with st.container():
content = event['validate']['messages'][0].content
if "VALID" in content:
st.success("βœ… Validation Passed")
with st.expander("View Validated Analysis", expanded=True):
st.markdown(content.split("Validation:")[0])
else:
st.warning("⚠️ Validation Issues Detected")
with st.expander("View Validation Details", expanded=True):
st.markdown(content)
elif 'enhance' in event:
with st.container():
content = event['enhance']['messages'][0].content
with st.expander("Enhanced Multi-Modal Analysis Report", expanded=True):
st.markdown(content)
def _display_knowledge_graph(self) -> None:
st.write("Knowledge Graph visualization is not implemented yet.")
# ------------------------------
# Multi-Modal Retriever Initialization
# ------------------------------
from transformers import CLIPProcessor, CLIPModel
clip_model = CLIPModel.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
clip_processor = CLIPProcessor.from_pretrained(ResearchConfig.CLIP_SETTINGS["model"])
multi_retriever = MultiModalRetriever(retriever.research_retriever, clip_model, clip_processor)
# ------------------------------
# Execute the Application
# ------------------------------
class ResearchInterfaceExtended(ResearchInterface):
"""Extended with domain adaptability, collaboration, and graph visualization."""
def _build_main_interface(self) -> None:
super()._build_main_interface()
if __name__ == "__main__":
ResearchInterfaceExtended()