Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

NeuroResearch_AI / app.py

mgbam

Update app.py

e1707aa verified about 1 month ago

raw

history blame

17.5 kB

	# ------------------------------
	# Imports & Initial Configuration
	# ------------------------------
	import streamlit as st
	# Set the page configuration immediately—this must be the first Streamlit command.
	st.set_page_config(page_title="NeuroResearch AI", layout="wide", initial_sidebar_state="expanded")

	from langchain_openai import OpenAIEmbeddings
	from langchain_community.vectorstores import Chroma
	from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langgraph.graph import END, StateGraph
	from langgraph.prebuilt import ToolNode
	from langgraph.graph.message import add_messages
	from typing_extensions import TypedDict, Annotated
	from typing import Sequence, Dict, List, Optional, Any
	import chromadb
	import re
	import os
	import requests
	import hashlib
	import json
	import time
	from langchain.tools.retriever import create_retriever_tool
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from datetime import datetime

	# ------------------------------
	# State Schema Definition
	# ------------------------------
	class AgentState(TypedDict):
	messages: Annotated[Sequence[AIMessage \| HumanMessage \| ToolMessage], add_messages]
	context: Dict[str, Any]
	metadata: Dict[str, Any]

	# ------------------------------
	# Configuration
	# ------------------------------
	class ResearchConfig:
	DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
	CHROMA_PATH = "chroma_db"
	CHUNK_SIZE = 512
	CHUNK_OVERLAP = 64
	MAX_CONCURRENT_REQUESTS = 5
	EMBEDDING_DIMENSIONS = 1536
	DOCUMENT_MAP = {
	"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
	"CV-Transformer Hybrid Architecture",
	"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
	"Transformer Architecture Analysis",
	"Latest Trends in Machine Learning Methods Using Quantum Computing":
	"Quantum ML Frontiers"
	}
	ANALYSIS_TEMPLATE = """Analyze these technical documents with scientific rigor:
	{context}

	Respond with:
	1. Key Technical Contributions (bullet points)
	2. Novel Methodologies
	3. Empirical Results (with metrics)
	4. Potential Applications
	5. Limitations & Future Directions

	Format: Markdown with LaTeX mathematical notation where applicable
	"""

	# Validate API key configuration
	if not ResearchConfig.DEEPSEEK_API_KEY:
	st.error("""Research Portal Configuration Required
	1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
	2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
	3. Rebuild deployment""")
	st.stop()

	# ------------------------------
	# Quantum Document Processing
	# ------------------------------
	class QuantumDocumentManager:
	def __init__(self):
	self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
	self.embeddings = OpenAIEmbeddings(
	model="text-embedding-3-large",
	dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
	)

	def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
	splitter = RecursiveCharacterTextSplitter(
	chunk_size=ResearchConfig.CHUNK_SIZE,
	chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
	separators=["\n\n", "\n", "\|\|\|"]
	)
	docs = splitter.create_documents(documents)
	# Debug: log the number of chunks created for the collection.
	st.write(f"Created {len(docs)} chunks for collection '{collection_name}'")
	return Chroma.from_documents(
	documents=docs,
	embedding=self.embeddings,
	client=self.client,
	collection_name=collection_name,
	ids=[self._document_id(doc.page_content) for doc in docs]
	)

	def _document_id(self, content: str) -> str:
	return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"

	# Initialize document collections
	qdm = QuantumDocumentManager()
	research_docs = qdm.create_collection([
	"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
	"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
	"Latest Trends in Machine Learning Methods Using Quantum Computing"
	], "research")

	development_docs = qdm.create_collection([
	"Project A: UI Design Completed, API Integration in Progress",
	"Project B: Testing New Feature X, Bug Fixes Needed",
	"Product Y: In the Performance Optimization Stage Before Release"
	], "development")

	# ------------------------------
	# Advanced Retrieval System
	# ------------------------------
	class ResearchRetriever:
	def __init__(self):
	self.retrievers = {
	"research": research_docs.as_retriever(
	search_type="mmr",
	search_kwargs={
	'k': 4,
	'fetch_k': 20,
	'lambda_mult': 0.85
	}
	),
	"development": development_docs.as_retriever(
	search_type="similarity",
	search_kwargs={'k': 3}
	)
	}

	def retrieve(self, query: str, domain: str) -> List[Any]:
	try:
	results = self.retrievers[domain].invoke(query)
	st.write(f"[DEBUG] Retrieved {len(results)} documents for query: '{query}' in domain '{domain}'")
	return results
	except KeyError:
	st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
	return []

	retriever = ResearchRetriever()

	# ------------------------------
	# Cognitive Processing Unit
	# ------------------------------
	class CognitiveProcessor:
	def __init__(self):
	self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
	self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]

	def process_query(self, prompt: str) -> Dict:
	futures = []
	for _ in range(3): # Triple redundancy for robustness
	futures.append(self.executor.submit(
	self._execute_api_request,
	prompt
	))

	results = []
	for future in as_completed(futures):
	try:
	results.append(future.result())
	except Exception as e:
	st.error(f"Processing Error: {str(e)}")

	return self._consensus_check(results)

	def _execute_api_request(self, prompt: str) -> Dict:
	headers = {
	"Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
	"Content-Type": "application/json",
	"X-Research-Session": self.session_id
	}

	try:
	response = requests.post(
	"https://api.deepseek.com/v1/chat/completions",
	headers=headers,
	json={
	"model": "deepseek-chat",
	"messages": [{
	"role": "user",
	"content": f"Respond as Senior AI Researcher:\n{prompt}"
	}],
	"temperature": 0.7,
	"max_tokens": 1500,
	"top_p": 0.9
	},
	timeout=45
	)
	response.raise_for_status()
	return response.json()
	except requests.exceptions.RequestException as e:
	return {"error": str(e)}

	def _consensus_check(self, results: List[Dict]) -> Dict:
	valid = [r for r in results if "error" not in r]
	if not valid:
	return {"error": "All API requests failed"}
	# Choose the result with the longest content for robustness.
	return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))

	# ------------------------------
	# Research Workflow Engine
	# ------------------------------
	class ResearchWorkflow:
	def __init__(self):
	self.processor = CognitiveProcessor()
	self.workflow = StateGraph(AgentState)
	self._build_workflow()

	def _build_workflow(self):
	# Register nodes in the state graph
	self.workflow.add_node("ingest", self.ingest_query)
	self.workflow.add_node("retrieve", self.retrieve_documents)
	self.workflow.add_node("analyze", self.analyze_content)
	self.workflow.add_node("validate", self.validate_output)
	self.workflow.add_node("refine", self.refine_results)

	self.workflow.set_entry_point("ingest")
	self.workflow.add_edge("ingest", "retrieve")
	self.workflow.add_edge("retrieve", "analyze")
	self.workflow.add_conditional_edges(
	"analyze",
	self._quality_check,
	{"valid": "validate", "invalid": "refine"}
	)
	self.workflow.add_edge("validate", END)
	self.workflow.add_edge("refine", "retrieve")

	self.app = self.workflow.compile()

	def ingest_query(self, state: AgentState) -> Dict:
	try:
	query = state["messages"][-1].content
	st.write(f"[DEBUG] Ingesting query: {query}")
	return {
	"messages": [AIMessage(content="Query ingested successfully")],
	"context": {"raw_query": query},
	"metadata": {"timestamp": datetime.now().isoformat()}
	}
	except Exception as e:
	return self._error_state(f"Ingestion Error: {str(e)}")

	def retrieve_documents(self, state: AgentState) -> Dict:
	try:
	query = state["context"]["raw_query"]
	docs = retriever.retrieve(query, "research")
	st.write(f"[DEBUG] Retrieved {len(docs)} documents from retrieval node.")
	return {
	"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
	"context": {
	"documents": docs,
	"retrieval_time": time.time()
	}
	}
	except Exception as e:
	return self._error_state(f"Retrieval Error: {str(e)}")

	def analyze_content(self, state: AgentState) -> Dict:
	try:
	# Ensure documents are present before proceeding.
	if "documents" not in state["context"] or not state["context"]["documents"]:
	return self._error_state("No documents retrieved; please check your query or retrieval process.")

	# Concatenate all document content for analysis.
	docs = "\n\n".join([d.page_content for d in state["context"]["documents"] if hasattr(d, "page_content")])
	st.write(f"[DEBUG] Analyzing content from {len(state['context']['documents'])} documents.")
	prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
	response = self.processor.process_query(prompt)

	if "error" in response:
	return self._error_state(response["error"])

	return {
	"messages": [AIMessage(content=response['choices'][0]['message']['content'])],
	"context": {"analysis": response}
	}
	except Exception as e:
	return self._error_state(f"Analysis Error: {str(e)}")

	def validate_output(self, state: AgentState) -> Dict:
	analysis = state["messages"][-1].content
	validation_prompt = f"""Validate research analysis:
	{analysis}

	Check for:
	1. Technical accuracy
	2. Citation support
	3. Logical consistency
	4. Methodological soundness

	Respond with 'VALID' or 'INVALID'"""

	response = self.processor.process_query(validation_prompt)
	return {
	"messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
	}

	def refine_results(self, state: AgentState) -> Dict:
	refinement_prompt = f"""Refine this analysis:
	{state["messages"][-1].content}

	Improve:
	1. Technical precision
	2. Empirical grounding
	3. Theoretical coherence"""

	response = self.processor.process_query(refinement_prompt)
	return {
	"messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
	"context": state["context"]
	}

	def _quality_check(self, state: AgentState) -> str:
	content = state["messages"][-1].content
	return "valid" if "VALID" in content else "invalid"

	def _error_state(self, message: str) -> Dict:
	st.write(f"[ERROR] {message}")
	return {
	"messages": [AIMessage(content=f"❌ {message}")],
	"context": {"error": True},
	"metadata": {"status": "error"}
	}

	# ------------------------------
	# Research Interface
	# ------------------------------
	class ResearchInterface:
	def __init__(self):
	self.workflow = ResearchWorkflow()
	# Do not call st.set_page_config here because it has already been called at the top.
	self._inject_styles()
	self._build_sidebar()
	self._build_main_interface()

	def _inject_styles(self):
	st.markdown("""
	<style>
	:root {
	--primary: #2ecc71;
	--secondary: #3498db;
	--background: #0a0a0a;
	--text: #ecf0f1;
	}

	.stApp {
	background: var(--background);
	color: var(--text);
	font-family: 'Roboto', sans-serif;
	}

	.stTextArea textarea {
	background: #1a1a1a !important;
	color: var(--text) !important;
	border: 2px solid var(--secondary);
	border-radius: 8px;
	padding: 1rem;
	}

	.stButton>button {
	background: linear-gradient(135deg, var(--primary), var(--secondary));
	border: none;
	border-radius: 8px;
	padding: 1rem 2rem;
	transition: all 0.3s;
	}

	.stButton>button:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
	}

	.stExpander {
	background: #1a1a1a;
	border: 1px solid #2a2a2a;
	border-radius: 8px;
	margin: 1rem 0;
	}
	</style>
	""", unsafe_allow_html=True)

	def _build_sidebar(self):
	with st.sidebar:
	st.title("🔍 Research Database")
	st.subheader("Technical Papers")
	for title, short in ResearchConfig.DOCUMENT_MAP.items():
	with st.expander(short):
	st.markdown(f"```\n{title}\n```")

	st.subheader("Analysis Metrics")
	st.metric("Vector Collections", 2)
	st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)

	def _build_main_interface(self):
	st.title("🧠 NeuroResearch AI")
	query = st.text_area("Research Query:", height=200,
	placeholder="Enter technical research question...")

	if st.button("Execute Analysis", type="primary"):
	self._execute_analysis(query)

	def _execute_analysis(self, query: str):
	try:
	with st.spinner("Initializing Quantum Analysis..."):
	results = self.workflow.app.stream(
	{"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
	)
	for event in results:
	self._render_event(event)
	st.success("✅ Analysis Completed Successfully")
	except Exception as e:
	st.error(f"""Analysis Failed
	{str(e)}
	Potential issues:
	- Complex query structure
	- Document correlation failure
	- Temporal processing constraints""")

	def _render_event(self, event: Dict):
	if 'ingest' in event:
	with st.container():
	st.success("✅ Query Ingested")
	elif 'retrieve' in event:
	with st.container():
	docs = event['retrieve']['context']['documents']
	st.info(f"📚 Retrieved {len(docs)} documents")
	with st.expander("View Retrieved Documents", expanded=False):
	for i, doc in enumerate(docs, 1):
	st.markdown(f"Document {i}")
	st.code(doc.page_content, language='text')
	elif 'analyze' in event:
	with st.container():
	content = event['analyze']['messages'][0].content
	with st.expander("Technical Analysis Report", expanded=True):
	st.markdown(content)
	elif 'validate' in event:
	with st.container():
	content = event['validate']['messages'][0].content
	if "VALID" in content:
	st.success("✅ Validation Passed")
	with st.expander("View Validated Analysis", expanded=True):
	st.markdown(content.split("Validation:")[0])
	else:
	st.warning("⚠️ Validation Issues Detected")
	with st.expander("View Validation Details", expanded=True):
	st.markdown(content)

	if __name__ == "__main__":
	ResearchInterface()