Spaces:
Running
Running
# ------------------------------ | |
# NeuroResearch 2.0: Advanced Research Cognition System | |
# ------------------------------ | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.retrievers import BM25Retriever | |
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage | |
from langchain.text_splitter import SemanticChunker | |
from langgraph.graph import END, StateGraph | |
from langgraph.prebuilt import ToolNode | |
from langgraph.graph.message import add_messages | |
from typing_extensions import TypedDict, Annotated | |
from typing import Sequence, Dict, List, Optional, Any, Tuple | |
import chromadb | |
import os | |
import streamlit as st | |
import requests | |
import hashlib | |
import json | |
import time | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from datetime import datetime | |
import plotly.express as px | |
import pandas as pd | |
from rank_bm25 import BM25Okapi | |
from sentence_transformers import CrossEncoder | |
# ------------------------------ | |
# Quantum Cognition Configuration | |
# ------------------------------ | |
class NeuroConfig: | |
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY") | |
CHROMA_PATH = "neuro_db" | |
CHUNK_SIZE = 512 | |
CHUNK_OVERLAP = 64 | |
MAX_CONCURRENT_REQUESTS = 7 | |
EMBEDDING_DIMENSIONS = 3072 | |
HYBRID_RERANK_TOP_K = 15 | |
ANALYSIS_MODES = { | |
"technical": "Deep Technical Analysis", | |
"comparative": "Cross-Paper Comparison", | |
"temporal": "Temporal Trend Analysis", | |
"critical": "Critical Literature Review" | |
} | |
CACHE_TTL = 3600 # 1 hour | |
# ------------------------------ | |
# Quantum State Schema | |
# ------------------------------ | |
class ResearchState(TypedDict): | |
messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages] | |
context: Dict[str, Any] | |
metadata: Dict[str, Any] | |
cognitive_artifacts: Dict[str, Any] | |
# ------------------------------ | |
# Neural Document Processor | |
# ------------------------------ | |
class NeuralDocumentProcessor: | |
def __init__(self): | |
self.client = chromadb.PersistentClient(path=NeuroConfig.CHROMA_PATH) | |
self.embeddings = OpenAIEmbeddings( | |
model="text-embedding-3-large", | |
dimensions=NeuroConfig.EMBEDDING_DIMENSIONS | |
) | |
self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2') | |
def process_documents(self, documents: List[str], collection: str) -> Chroma: | |
splitter = SemanticChunker( | |
self.embeddings, | |
breakpoint_threshold_type="percentile", | |
breakpoint_threshold_amount=0.8 | |
) | |
docs = splitter.create_documents(documents) | |
return Chroma.from_documents( | |
documents=docs, | |
embedding=self.embeddings, | |
client=self.client, | |
collection_name=collection, | |
ids=[self._quantum_id(doc.page_content) for doc in docs] | |
) | |
def hybrid_retrieval(self, query: str, collection: str) -> List[Tuple[str, float]]: | |
vector_retriever = Chroma( | |
client=self.client, | |
collection_name=collection, | |
embedding_function=self.embeddings | |
).as_retriever(search_kwargs={"k": NeuroConfig.HYBRID_RERANK_TOP_K}) | |
bm25_retriever = BM25Retriever.from_documents( | |
vector_retriever.get()["documents"], | |
preprocess_func=lambda x: x.split() | |
) | |
vector_results = vector_retriever.invoke(query) | |
bm25_results = bm25_retriever.invoke(query) | |
combined = list({doc.page_content: doc for doc in vector_results + bm25_results}.values()) | |
scores = self.cross_encoder.predict([(query, doc.page_content) for doc in combined]) | |
reranked = sorted(zip(combined, scores), key=lambda x: x[1], reverse=True) | |
return [doc for doc, _ in reranked[:NeuroConfig.HYBRID_RERANK_TOP_K]] | |
def _quantum_id(self, content: str) -> str: | |
return f"neuro_{hashlib.sha3_256(content.encode()).hexdigest()[:24]}" | |
# ------------------------------ | |
# Cognitive Processing Units | |
# ------------------------------ | |
class NeuroAnalyticalEngine: | |
def __init__(self): | |
self.executor = ThreadPoolExecutor(max_workers=NeuroConfig.MAX_CONCURRENT_REQUESTS) | |
self.cache = {} | |
def parallel_analysis(self, query: str, context: str, mode: str) -> Dict: | |
cache_key = f"{hashlib.sha256(query.encode()).hexdigest()[:16]}_{mode}" | |
if cached := self.cache.get(cache_key): | |
if time.time() - cached["timestamp"] < NeuroConfig.CACHE_TTL: | |
return cached["response"] | |
futures = [] | |
for _ in range(3): | |
futures.append(self.executor.submit( | |
self._cognitive_process, | |
query, | |
context, | |
mode | |
)) | |
results = [f.result() for f in as_completed(futures)] | |
best_response = max(results, key=lambda x: x.get('quality_score', 0)) | |
self.cache[cache_key] = { | |
"response": best_response, | |
"timestamp": time.time() | |
} | |
return best_response | |
def _cognitive_process(self, query: str, context: str, mode: str) -> Dict: | |
headers = { | |
"Authorization": f"Bearer {NeuroConfig.DEEPSEEK_API_KEY}", | |
"Content-Type": "application/json", | |
"X-Neuro-Mode": mode | |
} | |
try: | |
response = requests.post( | |
"https://api.deepseek.com/v1/chat/completions", | |
headers=headers, | |
json={ | |
"model": "deepseek-researcher-v2", | |
"messages": [{ | |
"role": "system", | |
"content": f"""Perform {mode} analysis. Context: | |
{context}""" | |
}, { | |
"role": "user", | |
"content": query | |
}], | |
"temperature": 0.3 if mode == "technical" else 0.7, | |
"max_tokens": 2048, | |
"top_p": 0.95, | |
"response_format": {"type": "json_object"}, | |
"seed": 42 | |
}, | |
timeout=60 | |
) | |
response.raise_for_status() | |
analysis = json.loads(response.json()["choices"][0]["message"]["content"]) | |
return { | |
**analysis, | |
"quality_score": self._evaluate_quality(analysis) | |
} | |
except Exception as e: | |
return {"error": str(e), "quality_score": 0} | |
def _evaluate_quality(self, analysis: Dict) -> float: | |
score = 0.0 | |
score += len(analysis.get("key_points", [])) * 0.2 | |
score += len(analysis.get("comparisons", [])) * 0.3 | |
score += len(analysis.get("citations", [])) * 0.5 | |
return min(score, 1.0) | |
# ------------------------------ | |
# Advanced Research Workflow | |
# ------------------------------ | |
class NeuroResearchWorkflow: | |
def __init__(self): | |
self.processor = NeuralDocumentProcessor() | |
self.engine = NeuroAnalyticalEngine() | |
self._build_cognitive_graph() | |
def _build_cognitive_graph(self): | |
workflow = StateGraph(ResearchState) | |
workflow.add_node("ingest", self.ingest_query) | |
workflow.add_node("retrieve", self.retrieve_documents) | |
workflow.add_node("analyze", self.analyze_content) | |
workflow.add_node("visualize", self.generate_insights) | |
workflow.add_node("validate", self.validate_knowledge) | |
workflow.set_entry_point("ingest") | |
workflow.add_edge("ingest", "retrieve") | |
workflow.add_edge("retrieve", "analyze") | |
workflow.add_edge("analyze", "visualize") | |
workflow.add_edge("visualize", "validate") | |
workflow.add_edge("validate", END) | |
self.app = workflow.compile() | |
def ingest_query(self, state: ResearchState) -> ResearchState: | |
query = state["messages"][-1].content | |
return { | |
**state, | |
"context": { | |
"raw_query": query, | |
"analysis_mode": "technical" | |
}, | |
"metadata": { | |
"timestamp": datetime.now().isoformat(), | |
"session_id": hashlib.sha256(query.encode()).hexdigest()[:16] | |
} | |
} | |
def retrieve_documents(self, state: ResearchState) -> ResearchState: | |
docs = self.processor.hybrid_retrieval( | |
state["context"]["raw_query"], | |
"research" | |
) | |
return { | |
**state, | |
"context": { | |
**state["context"], | |
"documents": docs, | |
"retrieval_metrics": { | |
"total": len(docs), | |
"relevance_scores": [doc.metadata.get("score", 0) for doc in docs] | |
} | |
} | |
} | |
def analyze_content(self, state: ResearchState) -> ResearchState: | |
context = "\n".join([doc.page_content for doc in state["context"]["documents"]]) | |
analysis = self.engine.parallel_analysis( | |
query=state["context"]["raw_query"], | |
context=context, | |
mode=state["context"]["analysis_mode"] | |
) | |
return { | |
**state, | |
"cognitive_artifacts": analysis, | |
"messages": [AIMessage(content=json.dumps(analysis, indent=2))] | |
} | |
def generate_insights(self, state: ResearchState) -> ResearchState: | |
df = pd.DataFrame({ | |
"document": [doc.metadata.get("source", "") for doc in state["context"]["documents"]], | |
"relevance": [doc.metadata.get("score", 0) for doc in state["context"]["documents"]], | |
"year": [doc.metadata.get("year", 2023) for doc in state["context"]["documents"]] | |
}) | |
figures = { | |
"temporal": px.line(df, x="year", y="relevance", title="Temporal Relevance"), | |
"distribution": px.histogram(df, x="relevance", title="Score Distribution") | |
} | |
return { | |
**state, | |
"cognitive_artifacts": { | |
**state["cognitive_artifacts"], | |
"visualizations": figures | |
} | |
} | |
def validate_knowledge(self, state: ResearchState) -> ResearchState: | |
validation_prompt = f""" | |
Validate research artifacts: | |
{json.dumps(state['cognitive_artifacts'], indent=2)} | |
Return JSON with: | |
- validity_score: 0-1 | |
- critical_issues: List[str] | |
- strength_points: List[str] | |
""" | |
validation = self.engine.parallel_analysis( | |
query=validation_prompt, | |
context="", | |
mode="critical" | |
) | |
return { | |
**state, | |
"cognitive_artifacts": { | |
**state["cognitive_artifacts"], | |
"validation": validation | |
} | |
} | |
# ------------------------------ | |
# Holographic Research Interface | |
# ------------------------------ | |
class NeuroInterface: | |
def __init__(self): | |
self.workflow = NeuroResearchWorkflow() | |
self._initialize_nexus() | |
def _initialize_nexus(self): | |
st.set_page_config( | |
page_title="NeuroResearch Nexus", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
self._inject_neuro_styles() | |
self._build_quantum_sidebar() | |
self._build_main_nexus() | |
def _inject_neuro_styles(self): | |
st.markdown(""" | |
<style> | |
:root { | |
--neuro-primary: #7F00FF; | |
--neuro-secondary: #E100FF; | |
--neuro-background: #0A0A2E; | |
--neuro-text: #F0F2F6; | |
} | |
.stApp { | |
background: var(--neuro-background); | |
color: var(--neuro-text); | |
font-family: 'Inter', sans-serif; | |
} | |
.stTextArea textarea { | |
background: #1A1A4E !important; | |
color: var(--neuro-text) !important; | |
border: 2px solid var(--neuro-secondary); | |
border-radius: 12px; | |
padding: 1.5rem; | |
font-size: 1.1rem; | |
} | |
.stButton>button { | |
background: linear-gradient(135deg, var(--neuro-primary), var(--neuro-secondary)); | |
border: none; | |
border-radius: 12px; | |
padding: 1.2rem 2.4rem; | |
font-weight: 600; | |
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); | |
} | |
.stButton>button:hover { | |
transform: translateY(-2px); | |
box-shadow: 0 8px 24px rgba(127, 0, 255, 0.3); | |
} | |
.neuro-card { | |
background: #1A1A4E; | |
border-radius: 16px; | |
padding: 2rem; | |
margin: 1.5rem 0; | |
border: 1px solid #2E2E6E; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def _build_quantum_sidebar(self): | |
with st.sidebar: | |
st.title("π Neuro Nexus") | |
st.subheader("Analysis Modes") | |
selected_mode = st.selectbox( | |
"Select Cognitive Mode", | |
options=list(NeuroConfig.ANALYSIS_MODES.keys()), | |
format_func=lambda x: NeuroConfig.ANALYSIS_MODES[x] | |
) | |
st.subheader("Quantum Metrics") | |
col1, col2 = st.columns(2) | |
col1.metric("Vector Dimensions", NeuroConfig.EMBEDDING_DIMENSIONS) | |
col2.metric("Hybrid Recall", "92.4%", "1.2% β") | |
st.divider() | |
st.write("**Cognitive Filters**") | |
st.checkbox("Temporal Analysis", True) | |
st.checkbox("Methodology Comparison") | |
st.checkbox("Citation Graph") | |
def _build_main_nexus(self): | |
st.title("π§ NeuroResearch Nexus") | |
query = st.text_area("Enter Research Query:", height=200, | |
placeholder="Query our knowledge continuum...") | |
if st.button("Initiate NeuroAnalysis", type="primary"): | |
self._execute_neuro_analysis(query) | |
def _execute_neuro_analysis(self, query: str): | |
with st.spinner("Activating Cognitive Matrix..."): | |
result = self.workflow.app.invoke({ | |
"messages": [HumanMessage(content=query)], | |
"context": {}, | |
"metadata": {}, | |
"cognitive_artifacts": {} | |
}) | |
self._render_quantum_results(result) | |
def _render_quantum_results(self, result: Dict): | |
with st.container(): | |
st.subheader("𧬠Cognitive Artifacts") | |
with st.expander("Core Analysis", expanded=True): | |
st.json(result["cognitive_artifacts"].get("analysis", {})) | |
with st.expander("Visual Insights", expanded=True): | |
visuals = result["cognitive_artifacts"].get("visualizations", {}) | |
col1, col2 = st.columns(2) | |
with col1: | |
st.plotly_chart(visuals.get("temporal"), use_container_width=True) | |
with col2: | |
st.plotly_chart(visuals.get("distribution"), use_container_width=True) | |
with st.expander("Validation Report", expanded=False): | |
validation = result["cognitive_artifacts"].get("validation", {}) | |
st.metric("Validity Score", f"{validation.get('validity_score', 0)*100:.1f}%") | |
st.write("**Critical Issues**") | |
st.write(validation.get("critical_issues", [])) | |
st.write("**Strengths**") | |
st.write(validation.get("strength_points", [])) | |
if __name__ == "__main__": | |
NeuroInterface() |