# ------------------------------ # Enhanced NeuroResearch AI System with Refinement Counter and Increased Recursion Limit # ------------------------------ import logging import os import re import hashlib import json import time import sys from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed from typing import List, Dict, Any, Optional, Sequence import chromadb import requests import streamlit as st # LangChain and LangGraph imports from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import Chroma from langchain_core.messages import HumanMessage, AIMessage, ToolMessage from langchain.text_splitter import RecursiveCharacterTextSplitter from langgraph.graph import END, StateGraph from langgraph.prebuilt import ToolNode from langgraph.graph.message import add_messages from typing_extensions import TypedDict, Annotated from langchain.tools.retriever import create_retriever_tool # Increase Python's recursion limit at the very start (if needed) sys.setrecursionlimit(10000) # ------------------------------ # Logging Configuration # ------------------------------ logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s" ) logger = logging.getLogger(__name__) # ------------------------------ # State Schema Definition # ------------------------------ class AgentState(TypedDict): messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages] context: Dict[str, Any] metadata: Dict[str, Any] # ------------------------------ # Configuration # ------------------------------ class ResearchConfig: DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY") CHROMA_PATH = "chroma_db" CHUNK_SIZE = 512 CHUNK_OVERLAP = 64 MAX_CONCURRENT_REQUESTS = 5 EMBEDDING_DIMENSIONS = 1536 DOCUMENT_MAP = { "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%": "CV-Transformer Hybrid Architecture", "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing": "Transformer Architecture Analysis", "Latest Trends in Machine Learning Methods Using Quantum Computing": "Quantum ML Frontiers" } ANALYSIS_TEMPLATE = ( "Analyze these technical documents with scientific rigor:\n{context}\n\n" "Respond with:\n" "1. Key Technical Contributions (bullet points)\n" "2. Novel Methodologies\n" "3. Empirical Results (with metrics)\n" "4. Potential Applications\n" "5. Limitations & Future Directions\n\n" "Format: Markdown with LaTeX mathematical notation where applicable" ) if not ResearchConfig.DEEPSEEK_API_KEY: st.error( """**Research Portal Configuration Required** 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/) 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings 3. Rebuild deployment""" ) st.stop() # ------------------------------ # Quantum Document Processing # ------------------------------ class QuantumDocumentManager: """ Manages creation of Chroma collections from raw document texts. """ def __init__(self) -> None: try: self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH) logger.info("Initialized PersistentClient for Chroma.") except Exception as e: logger.error(f"Error initializing PersistentClient: {e}") self.client = chromadb.Client() # Fallback to in-memory client self.embeddings = OpenAIEmbeddings( model="text-embedding-3-large", dimensions=ResearchConfig.EMBEDDING_DIMENSIONS ) def create_collection(self, documents: List[str], collection_name: str) -> Chroma: """ Splits documents into chunks and stores them as a Chroma collection. """ splitter = RecursiveCharacterTextSplitter( chunk_size=ResearchConfig.CHUNK_SIZE, chunk_overlap=ResearchConfig.CHUNK_OVERLAP, separators=["\n\n", "\n", "|||"] ) try: docs = splitter.create_documents(documents) logger.info(f"Created {len(docs)} document chunks for collection '{collection_name}'.") except Exception as e: logger.error(f"Error splitting documents: {e}") raise e return Chroma.from_documents( documents=docs, embedding=self.embeddings, client=self.client, collection_name=collection_name, ids=[self._document_id(doc.page_content) for doc in docs] ) def _document_id(self, content: str) -> str: """ Generates a unique document ID using SHA256 and the current timestamp. """ return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}" # Initialize document collections qdm = QuantumDocumentManager() research_docs = qdm.create_collection([ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%", "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing", "Latest Trends in Machine Learning Methods Using Quantum Computing" ], "research") development_docs = qdm.create_collection([ "Project A: UI Design Completed, API Integration in Progress", "Project B: Testing New Feature X, Bug Fixes Needed", "Product Y: In the Performance Optimization Stage Before Release" ], "development") # ------------------------------ # Advanced Retrieval System # ------------------------------ class ResearchRetriever: """ Provides retrieval methods for different domains. """ def __init__(self) -> None: try: self.research_retriever = research_docs.as_retriever( search_type="mmr", search_kwargs={'k': 4, 'fetch_k': 20, 'lambda_mult': 0.85} ) self.development_retriever = development_docs.as_retriever( search_type="similarity", search_kwargs={'k': 3} ) logger.info("Initialized retrievers for research and development domains.") except Exception as e: logger.error(f"Error initializing retrievers: {e}") raise e def retrieve(self, query: str, domain: str) -> List[Any]: """ Retrieves documents based on the query and domain. """ try: if domain == "research": return self.research_retriever.invoke(query) elif domain == "development": return self.development_retriever.invoke(query) else: logger.warning(f"Domain '{domain}' not recognized.") return [] except Exception as e: logger.error(f"Retrieval error for domain '{domain}': {e}") return [] retriever = ResearchRetriever() # ------------------------------ # Cognitive Processing Unit # ------------------------------ class CognitiveProcessor: """ Executes API requests to the DeepSeek backend using triple redundancy and consolidates results via a consensus mechanism. """ def __init__(self) -> None: self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS) self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12] def process_query(self, prompt: str) -> Dict: """ Processes a query by sending multiple API requests in parallel. """ futures = [] for _ in range(3): # Triple redundancy for reliability futures.append(self.executor.submit(self._execute_api_request, prompt)) results = [] for future in as_completed(futures): try: results.append(future.result()) except Exception as e: logger.error(f"Error in API request: {e}") st.error(f"Processing Error: {str(e)}") return self._consensus_check(results) def _execute_api_request(self, prompt: str) -> Dict: """ Executes a single API request to the DeepSeek endpoint. """ headers = { "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}", "Content-Type": "application/json", "X-Research-Session": self.session_id } payload = { "model": "deepseek-chat", "messages": [{ "role": "user", "content": f"Respond as Senior AI Researcher:\n{prompt}" }], "temperature": 0.7, "max_tokens": 1500, "top_p": 0.9 } try: response = requests.post( "https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload, timeout=45 ) response.raise_for_status() logger.info("DeepSeek API request successful.") return response.json() except requests.exceptions.RequestException as e: logger.error(f"DeepSeek API request failed: {e}") return {"error": str(e)} def _consensus_check(self, results: List[Dict]) -> Dict: """ Consolidates multiple API responses, selecting the one with the most content. """ valid_results = [r for r in results if "error" not in r] if not valid_results: logger.error("All API requests failed.") return {"error": "All API requests failed"} return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', ''))) # ------------------------------ # Research Workflow Engine # ------------------------------ class ResearchWorkflow: """ Defines the multi-step research workflow using a state graph. """ def __init__(self) -> None: self.processor = CognitiveProcessor() self.workflow = StateGraph(AgentState) self._build_workflow() self.app = self.workflow.compile() def _build_workflow(self) -> None: # Define nodes self.workflow.add_node("ingest", self.ingest_query) self.workflow.add_node("retrieve", self.retrieve_documents) self.workflow.add_node("analyze", self.analyze_content) self.workflow.add_node("validate", self.validate_output) self.workflow.add_node("refine", self.refine_results) # Set entry point and edges self.workflow.set_entry_point("ingest") self.workflow.add_edge("ingest", "retrieve") self.workflow.add_edge("retrieve", "analyze") self.workflow.add_conditional_edges( "analyze", self._quality_check, {"valid": "validate", "invalid": "refine"} ) self.workflow.add_edge("validate", END) self.workflow.add_edge("refine", "retrieve") def ingest_query(self, state: AgentState) -> Dict: """ Ingests the research query and initializes the refinement counter. """ try: query = state["messages"][-1].content # Initialize context with raw query and refinement counter new_context = {"raw_query": query, "refine_count": 0} logger.info("Query ingested.") return { "messages": [AIMessage(content="Query ingested successfully")], "context": new_context, "metadata": {"timestamp": datetime.now().isoformat()} } except Exception as e: return self._error_state(f"Ingestion Error: {str(e)}") def retrieve_documents(self, state: AgentState) -> Dict: """ Retrieves research documents based on the query. """ try: query = state["context"]["raw_query"] docs = retriever.retrieve(query, "research") logger.info(f"Retrieved {len(docs)} documents for query.") return { "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")], "context": {"documents": docs, "retrieval_time": time.time(), "refine_count": state["context"].get("refine_count", 0)} } except Exception as e: return self._error_state(f"Retrieval Error: {str(e)}") def analyze_content(self, state: AgentState) -> Dict: """ Analyzes the retrieved documents using the DeepSeek API. """ try: docs = state["context"].get("documents", []) docs_text = "\n\n".join([d.page_content for d in docs]) prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text) response = self.processor.process_query(prompt) if "error" in response: return self._error_state(response["error"]) logger.info("Content analysis completed.") return { "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))], "context": {"analysis": response, "refine_count": state["context"].get("refine_count", 0)} } except Exception as e: return self._error_state(f"Analysis Error: {str(e)}") def validate_output(self, state: AgentState) -> Dict: """ Validates the technical analysis report. """ analysis = state["messages"][-1].content validation_prompt = ( f"Validate research analysis:\n{analysis}\n\n" "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n" "Respond with 'VALID' or 'INVALID'" ) response = self.processor.process_query(validation_prompt) logger.info("Output validation completed.") return { "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")] } def refine_results(self, state: AgentState) -> Dict: """ Refines the analysis report if validation fails. Increments the refinement counter to limit infinite loops. """ current_count = state["context"].get("refine_count", 0) state["context"]["refine_count"] = current_count + 1 logger.info(f"Refinement iteration: {state['context']['refine_count']}") refinement_prompt = ( f"Refine this analysis:\n{state['messages'][-1].content}\n\n" "Improve:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence" ) response = self.processor.process_query(refinement_prompt) logger.info("Refinement completed.") return { "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))], "context": state["context"] } def _quality_check(self, state: AgentState) -> str: """ Checks whether the analysis report is valid. Forces a valid state if the refinement count exceeds a threshold. """ refine_count = state["context"].get("refine_count", 0) if refine_count >= 3: logger.warning("Refinement limit reached. Forcing valid outcome to prevent infinite recursion.") return "valid" content = state["messages"][-1].content quality = "valid" if "VALID" in content else "invalid" logger.info(f"Quality check returned: {quality}") return quality def _error_state(self, message: str) -> Dict: """ Returns a standardized error state. """ logger.error(message) return { "messages": [AIMessage(content=f"❌ {message}")], "context": {"error": True}, "metadata": {"status": "error"} } # ------------------------------ # Research Interface (Streamlit UI) # ------------------------------ class ResearchInterface: """ Provides the Streamlit-based interface for executing the research workflow. """ def __init__(self) -> None: self.workflow = ResearchWorkflow() self._initialize_interface() def _initialize_interface(self) -> None: st.set_page_config( page_title="NeuroResearch AI", layout="wide", initial_sidebar_state="expanded" ) self._inject_styles() self._build_sidebar() self._build_main_interface() def _inject_styles(self) -> None: st.markdown( """ """, unsafe_allow_html=True ) def _build_sidebar(self) -> None: with st.sidebar: st.title("🔍 Research Database") st.subheader("Technical Papers") for title, short in ResearchConfig.DOCUMENT_MAP.items(): with st.expander(short): st.markdown(f"```\n{title}\n```") st.subheader("Analysis Metrics") st.metric("Vector Collections", 2) st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS) def _build_main_interface(self) -> None: st.title("🧠 NeuroResearch AI") query = st.text_area( "Research Query:", height=200, placeholder="Enter technical research question..." ) if st.button("Execute Analysis", type="primary"): self._execute_analysis(query) def _execute_analysis(self, query: str) -> None: try: with st.spinner("Initializing Quantum Analysis..."): # Pass a recursion limit configuration into the graph invocation results = self.workflow.app.stream({ "messages": [HumanMessage(content=query)], "context": {}, "metadata": {} }, {"recursion_limit": 100}) for event in results: self._render_event(event) st.success("✅ Analysis Completed Successfully") except Exception as e: logger.error(f"Workflow execution failed: {e}") st.error( f"""**Analysis Failed** {str(e)} Potential issues: - Complex query structure - Document correlation failure - Temporal processing constraints""" ) def _render_event(self, event: Dict) -> None: if 'ingest' in event: with st.container(): st.success("✅ Query Ingested") elif 'retrieve' in event: with st.container(): docs = event['retrieve']['context'].get('documents', []) st.info(f"📚 Retrieved {len(docs)} documents") with st.expander("View Retrieved Documents", expanded=False): for idx, doc in enumerate(docs, start=1): st.markdown(f"**Document {idx}**") st.code(doc.page_content, language='text') elif 'analyze' in event: with st.container(): content = event['analyze']['messages'][0].content with st.expander("Technical Analysis Report", expanded=True): st.markdown(content) elif 'validate' in event: with st.container(): content = event['validate']['messages'][0].content if "VALID" in content: st.success("✅ Validation Passed") with st.expander("View Validated Analysis", expanded=True): st.markdown(content.split("Validation:")[0]) else: st.warning("⚠️ Validation Issues Detected") with st.expander("View Validation Details", expanded=True): st.markdown(content) if __name__ == "__main__": ResearchInterface()