Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 13

Commit

7370048

verified ·

1 Parent(s): de3ef7d

Update app.py

Browse files

Files changed (1) hide show

app.py +474 -269

app.py CHANGED Viewed

@@ -1,334 +1,539 @@
 # ------------------------------
-# NeuroResearch 2.1: Robust Research System
 # ------------------------------
-from langchain_openai import OpenAIEmbeddings
-from langchain_community.vectorstores import Chroma
-from langchain_community.retrievers import BM25Retriever
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from rank_bm25 import BM25Okapi
-from sentence_transformers import CrossEncoder
-from typing_extensions import TypedDict, Annotated
-from typing import (
-    Sequence, Dict, List, Optional, Any, Tuple, Union
-)
-import chromadb
 import os
 import hashlib
 import json
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 import streamlit as st
-import plotly.express as px
-import pandas as pd
 # ------------------------------
 # Configuration
 # ------------------------------
-class NeuroConfig:
-    """
-    Configuration class for NeuroResearch system.
-    Attributes:
-        DEEPSEEK_API_KEY (str): Optional API key for external services.
-        CHROMA_PATH (str): File path for Chroma's persistent storage.
-        CHUNK_SIZE (int): Maximum length of text chunks for splitting.
-        CHUNK_OVERLAP (int): Overlap between text chunks to preserve context.
-        MAX_CONCURRENT_REQUESTS (int): Number of concurrent threads for processing.
-        EMBEDDING_DIMENSIONS (int): Dimensionality of embeddings.
-        HYBRID_RERANK_TOP_K (int): Number of documents to retrieve and rerank.
-        ANALYSIS_MODES (dict): Possible analysis modes and their descriptions.
-        CACHE_TTL (int): Time-to-live (seconds) for cached items.
-    """
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
-    CHROMA_PATH = "neuro_db"
     CHUNK_SIZE = 512
     CHUNK_OVERLAP = 64
-    MAX_CONCURRENT_REQUESTS = 7
-    EMBEDDING_DIMENSIONS = 3072
-    HYBRID_RERANK_TOP_K = 15
-    ANALYSIS_MODES = {
-        "technical": "Deep Technical Analysis",
-        "comparative": "Cross-Paper Comparison",
-        "temporal": "Temporal Trend Analysis",
-        "critical": "Critical Literature Review"
     }
-    CACHE_TTL = 3600  # 1 hour
 # ------------------------------
-# Document Processor
 # ------------------------------
-class NeuralDocumentProcessor:
     """
-    A document processing and retrieval utility class.
-    Responsibilities:
-      - Splitting documents into manageable chunks.
-      - Storing and retrieving embeddings with Chroma.
-      - Performing hybrid retrieval (vector + BM25) with cross-encoder reranking.
-      - Handling concurrency during document ingestion (optional).
     """
     def __init__(self) -> None:
-        """
-        Initialize the NeuralDocumentProcessor with a persistent Chroma client,
-        OpenAI-based embeddings, a CrossEncoder for reranking, and a text splitter.
-        """
-        # Persistent Chroma client
         try:
-            self.client = chromadb.PersistentClient(path=NeuroConfig.CHROMA_PATH)
         except Exception as e:
-            # Fallback to in-memory client if persistent fails
-            print(f"Error initializing Chroma PersistentClient: {e}")
-            self.client = chromadb.Client()
-        # Embeddings (OpenAI-based)
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
-            dimensions=NeuroConfig.EMBEDDING_DIMENSIONS
         )
-        # Cross-encoder for reranking
-        self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
-        # Text splitter
-        self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=NeuroConfig.CHUNK_SIZE,
-            chunk_overlap=NeuroConfig.CHUNK_OVERLAP,
-            separators=["\n\n", "\n", "(?<=\\. )", " "],
         )
-    def process_documents(
-        self,
-        documents: List[str],
-        collection: str,
-        use_concurrency: bool = False
-    ) -> Optional[Chroma]:
         """
-        Process a list of document strings by splitting, embedding, and storing them in Chroma.
-        Optionally uses concurrency for splitting documents.
-        Args:
-            documents (List[str]): The list of raw document texts.
-            collection (str): The Chroma collection name to store these documents in.
-            use_concurrency (bool, optional): If True, process documents concurrently. Defaults to False.
-        Returns:
-            Optional[Chroma]: The Chroma vectorstore for the specified collection, or None if no docs.
         """
-        if not documents:
-            print("No documents provided to process_documents.")
-            return None
-        # Split documents into chunks
-        if use_concurrency and len(documents) > 1:
-            chunks = []
-            with ThreadPoolExecutor(max_workers=NeuroConfig.MAX_CONCURRENT_REQUESTS) as executor:
-                future_to_doc = {
-                    executor.submit(self.text_splitter.create_documents, [doc]): doc
-                    for doc in documents
-                }
-                for future in as_completed(future_to_doc):
-                    try:
-                        result = future.result()
-                        chunks.extend(result)
-                    except Exception as e:
-                        print(f"Error splitting document: {e}")
-        else:
-            # Single-threaded splitting
-            chunks = []
-            for doc in documents:
-                chunks.extend(self.text_splitter.create_documents([doc]))
-        # Build unique IDs for each chunk
-        chunk_ids = [self._quantum_id(doc.page_content) for doc in chunks]
-        # Create Chroma from documents
         try:
-            vectorstore = Chroma.from_documents(
-                documents=chunks,
-                embedding=self.embeddings,
-                client=self.client,
-                collection_name=collection,
-                ids=chunk_ids
             )
-            return vectorstore
         except Exception as e:
-            print(f"Error creating Chroma collection: {e}")
-            return None
-    def hybrid_retrieval(
-        self,
-        query: str,
-        collection: str,
-        return_scores: bool = False
-    ) -> Union[List[str], List[Tuple[str, float]]]:
         """
-        Perform hybrid retrieval combining vector-based search with BM25,
-        then re-rank the combined results using a cross-encoder.
-        Args:
-            query (str): The user query for retrieving documents.
-            collection (str): The name of the Chroma collection to search.
-            return_scores (bool): If True, return a list of (document, score) tuples.
-                                  Otherwise, return a list of document strings only.
-        Returns:
-            Union[List[str], List[Tuple[str, float]]]: The top-k reranked results,
-            either as strings or (string, score) pairs.
         """
-        # Try to load the existing collection
         try:
-            vector_store = Chroma(
-                client=self.client,
-                collection_name=collection,
-                embedding_function=self.embeddings
-            )
         except Exception as e:
-            print(f"Error loading Chroma collection '{collection}': {e}")
-            return [] if not return_scores else []
-        # Check if the collection is empty
-        stored_docs = vector_store.get()
-        if not stored_docs or "documents" not in stored_docs or not stored_docs["documents"]:
-            print(f"No documents found in collection '{collection}'.")
-            return [] if not return_scores else []
-        all_docs = [doc.page_content for doc in stored_docs["documents"]]
-        if not all_docs:
-            print(f"No documents found in collection '{collection}'.")
-            return [] if not return_scores else []
-        # Vector-based retrieval
         try:
-            vector_retriever = vector_store.as_retriever(
-                search_kwargs={"k": NeuroConfig.HYBRID_RERANK_TOP_K}
             )
-            vector_results = [doc.page_content for doc in vector_retriever.invoke(query)]
-        except Exception as e:
-            print(f"Error during vector retrieval: {e}")
-            vector_results = []
-        # BM25 retrieval
-        tokenized_docs = [doc.split() for doc in all_docs]
-        bm25 = BM25Okapi(tokenized_docs)
-        bm25_results = bm25.get_top_n(
-            query.split(),
-            all_docs,
-            n=NeuroConfig.HYBRID_RERANK_TOP_K
         )
-        # Combine results and remove duplicates
-        combined = list(set(vector_results + bm25_results))
-        if not combined:
-            print("No documents retrieved by either BM25 or vector search.")
-            return [] if not return_scores else []
-        # Cross-encoder reranking
-        scores = self.cross_encoder.predict([(query, doc) for doc in combined])
-        reranked = sorted(zip(combined, scores), key=lambda x: x[1], reverse=True)
-        top_results = reranked[:NeuroConfig.HYBRID_RERANK_TOP_K]
-        # Return based on user preference
-        if return_scores:
-            return top_results  # List[Tuple[str, float]]
-        else:
-            return [doc for doc, _ in top_results]
-    def _quantum_id(self, content: str) -> str:
         """
-        Create a unique ID for each text chunk by hashing its content.
-        Args:
-            content (str): The text content of the chunk.
-        Returns:
-            str: A unique hash-based identifier.
         """
-        return f"neuro_{hashlib.sha3_256(content.encode()).hexdigest()[:24]}"
 # ------------------------------
-# NeuroInterface (Streamlit Example)
 # ------------------------------
-def NeuroInterface() -> None:
     """
-    A basic Streamlit-based interface to demonstrate usage of the NeuralDocumentProcessor.
-    This function can be adapted for Hugging Face Spaces or other frontends.
     """
-    st.title("NeuroResearch 2.1: Robust Research System")
-    # Initialize Document Processor
-    processor = NeuralDocumentProcessor()
-    # Sidebar for uploading and processing documents
-    with st.sidebar:
-        st.header("Document Ingestion")
-        uploaded_files = st.file_uploader(
-            "Upload one or more text files",
-            type=["txt", "md", "pdf"],
-            accept_multiple_files=True
         )
-        collection_name = st.text_input("Collection Name", value="default_collection")
-        use_concurrency = st.checkbox("Use Concurrency for Processing?", value=False)
-        if st.button("Process Documents"):
-            if uploaded_files and collection_name.strip():
-                # Read files
-                docs_content = []
-                for uf in uploaded_files:
-                    content = uf.read()
-                    # Assume UTF-8; adapt as needed
-                    try:
-                        docs_content.append(content.decode("utf-8"))
-                    except UnicodeDecodeError:
-                        st.error(f"Could not decode {uf.name}. Make sure it's UTF-8 text.")
-                st.write("Processing documents...")
-                vectorstore = processor.process_documents(
-                    documents=docs_content,
-                    collection=collection_name,
-                    use_concurrency=use_concurrency
-                )
-                if vectorstore:
-                    st.success(f"Documents processed and stored in collection: {collection_name}")
-                else:
-                    st.error("Processing failed or returned no vectorstore.")
-    # Main interface for querying
-    st.subheader("Query Documents")
-    user_query = st.text_input("Enter your query:")
-    return_scores = st.checkbox("Return Scores?")
-    if st.button("Search"):
-        if not user_query.strip() or not collection_name.strip():
-            st.warning("Please provide both a query and a valid collection name.")
-        else:
-            st.write(f"Retrieving from collection: {collection_name}")
-            results = processor.hybrid_retrieval(
-                query=user_query,
-                collection=collection_name,
-                return_scores=return_scores
             )
-            if results:
-                st.write("Top Reranked Results:")
-                if return_scores:
-                    # Each result is (doc, score)
-                    for idx, (doc, score) in enumerate(results, start=1):
-                        st.markdown(f"**Result {idx} | Score: {score:.4f}**")
-                        st.write(doc[:500] + ("..." if len(doc) > 500 else ""))
                 else:
-                    # Just doc texts
-                    for idx, doc in enumerate(results, start=1):
-                        st.markdown(f"**Result {idx}**")
-                        st.write(doc[:500] + ("..." if len(doc) > 500 else ""))
-            else:
-                st.warning("No results found or collection may be empty.")
-# ------------------------------
-# Main Entry Point
-# ------------------------------
 if __name__ == "__main__":
-    NeuroInterface()

 # ------------------------------
+# Enhanced NeuroResearch AI System
 # ------------------------------
+import logging
 import os
+import re
 import hashlib
 import json
 import time
 from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import List, Dict, Any, Optional, Sequence
+import requests
 import streamlit as st
+# LangChain and LangGraph imports
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langgraph.graph import END, StateGraph
+from langgraph.prebuilt import ToolNode
+from langgraph.graph.message import add_messages
+from typing_extensions import TypedDict, Annotated
+from langchain.tools.retriever import create_retriever_tool
+# ------------------------------
+# Logging Configuration
+# ------------------------------
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s"
+)
+logger = logging.getLogger(__name__)
+# ------------------------------
+# State Schema Definition
+# ------------------------------
+class AgentState(TypedDict):
+    messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
+    context: Dict[str, Any]
+    metadata: Dict[str, Any]
 # ------------------------------
 # Configuration
 # ------------------------------
+class ResearchConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
+    CHROMA_PATH = "chroma_db"
     CHUNK_SIZE = 512
     CHUNK_OVERLAP = 64
+    MAX_CONCURRENT_REQUESTS = 5
+    EMBEDDING_DIMENSIONS = 1536
+    DOCUMENT_MAP = {
+        "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
+            "CV-Transformer Hybrid Architecture",
+        "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
+            "Transformer Architecture Analysis",
+        "Latest Trends in Machine Learning Methods Using Quantum Computing":
+            "Quantum ML Frontiers"
     }
+    ANALYSIS_TEMPLATE = (
+        "Analyze these technical documents with scientific rigor:\n{context}\n\n"
+        "Respond with:\n"
+        "1. Key Technical Contributions (bullet points)\n"
+        "2. Novel Methodologies\n"
+        "3. Empirical Results (with metrics)\n"
+        "4. Potential Applications\n"
+        "5. Limitations & Future Directions\n\n"
+        "Format: Markdown with LaTeX mathematical notation where applicable"
+    )
+if not ResearchConfig.DEEPSEEK_API_KEY:
+    st.error(
+        """**Research Portal Configuration Required**
+1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
+2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
+3. Rebuild deployment"""
+    )
+    st.stop()
 # ------------------------------
+# Quantum Document Processing
 # ------------------------------
+class QuantumDocumentManager:
     """
+    Manages the creation of Chroma collections from raw document texts.
     """
     def __init__(self) -> None:
         try:
+            self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
+            logger.info("Initialized PersistentClient for Chroma.")
         except Exception as e:
+            logger.error(f"Error initializing PersistentClient: {e}")
+            self.client = chromadb.Client()  # Fallback to in-memory client
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
+            dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
+    def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
+        """
+        Splits documents into chunks and stores them as a Chroma collection.
+        """
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=ResearchConfig.CHUNK_SIZE,
+            chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
+            separators=["\n\n", "\n", "|||"]
+        )
+        try:
+            docs = splitter.create_documents(documents)
+            logger.info(f"Created {len(docs)} document chunks for collection '{collection_name}'.")
+        except Exception as e:
+            logger.error(f"Error splitting documents: {e}")
+            raise e
+        return Chroma.from_documents(
+            documents=docs,
+            embedding=self.embeddings,
+            client=self.client,
+            collection_name=collection_name,
+            ids=[self._document_id(doc.page_content) for doc in docs]
         )
+    def _document_id(self, content: str) -> str:
         """
+        Generates a unique document ID using SHA256 and the current timestamp.
         """
+        return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
+# Initialize document collections
+qdm = QuantumDocumentManager()
+research_docs = qdm.create_collection([
+    "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
+    "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
+    "Latest Trends in Machine Learning Methods Using Quantum Computing"
+], "research")
+development_docs = qdm.create_collection([
+    "Project A: UI Design Completed, API Integration in Progress",
+    "Project B: Testing New Feature X, Bug Fixes Needed",
+    "Product Y: In the Performance Optimization Stage Before Release"
+], "development")
+# ------------------------------
+# Advanced Retrieval System
+# ------------------------------
+class ResearchRetriever:
+    """
+    Provides retrieval methods for different domains.
+    """
+    def __init__(self) -> None:
         try:
+            self.research_retriever = research_docs.as_retriever(
+                search_type="mmr",
+                search_kwargs={'k': 4, 'fetch_k': 20, 'lambda_mult': 0.85}
+            )
+            self.development_retriever = development_docs.as_retriever(
+                search_type="similarity",
+                search_kwargs={'k': 3}
             )
+            logger.info("Initialized retrievers for research and development domains.")
         except Exception as e:
+            logger.error(f"Error initializing retrievers: {e}")
+            raise e
+    def retrieve(self, query: str, domain: str) -> List[Any]:
         """
+        Retrieves documents based on the query and domain.
         """
         try:
+            if domain == "research":
+                return self.research_retriever.invoke(query)
+            elif domain == "development":
+                return self.development_retriever.invoke(query)
+            else:
+                logger.warning(f"Domain '{domain}' not recognized.")
+                return []
         except Exception as e:
+            logger.error(f"Retrieval error for domain '{domain}': {e}")
+            return []
+retriever = ResearchRetriever()
+# ------------------------------
+# Cognitive Processing Unit
+# ------------------------------
+class CognitiveProcessor:
+    """
+    Executes API requests to the DeepSeek backend using triple redundancy
+    and consolidates results via a consensus mechanism.
+    """
+    def __init__(self) -> None:
+        self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
+        self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
+    def process_query(self, prompt: str) -> Dict:
+        """
+        Process a query by sending multiple API requests in parallel.
+        """
+        futures = []
+        for _ in range(3):  # Triple redundancy for reliability
+            futures.append(self.executor.submit(self._execute_api_request, prompt))
+        results = []
+        for future in as_completed(futures):
+            try:
+                results.append(future.result())
+            except Exception as e:
+                logger.error(f"Error in API request: {e}")
+                st.error(f"Processing Error: {str(e)}")
+        return self._consensus_check(results)
+    def _execute_api_request(self, prompt: str) -> Dict:
+        """
+        Executes a single API request to the DeepSeek endpoint.
+        """
+        headers = {
+            "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
+            "Content-Type": "application/json",
+            "X-Research-Session": self.session_id
+        }
+        payload = {
+            "model": "deepseek-chat",
+            "messages": [{
+                "role": "user",
+                "content": f"Respond as Senior AI Researcher:\n{prompt}"
+            }],
+            "temperature": 0.7,
+            "max_tokens": 1500,
+            "top_p": 0.9
+        }
         try:
+            response = requests.post(
+                "https://api.deepseek.com/v1/chat/completions",
+                headers=headers,
+                json=payload,
+                timeout=45
             )
+            response.raise_for_status()
+            logger.info("DeepSeek API request successful.")
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            logger.error(f"DeepSeek API request failed: {e}")
+            return {"error": str(e)}
+    def _consensus_check(self, results: List[Dict]) -> Dict:
+        """
+        Consolidates multiple API responses, selecting the one with the most content.
+        """
+        valid_results = [r for r in results if "error" not in r]
+        if not valid_results:
+            logger.error("All API requests failed.")
+            return {"error": "All API requests failed"}
+        # Choose the response with the longest content
+        return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
+# ------------------------------
+# Research Workflow Engine
+# ------------------------------
+class ResearchWorkflow:
+    """
+    Defines the multi-step research workflow using a state graph.
+    """
+    def __init__(self) -> None:
+        self.processor = CognitiveProcessor()
+        self.workflow = StateGraph(AgentState)
+        self._build_workflow()
+        self.app = self.workflow.compile()
+    def _build_workflow(self) -> None:
+        # Define nodes
+        self.workflow.add_node("ingest", self.ingest_query)
+        self.workflow.add_node("retrieve", self.retrieve_documents)
+        self.workflow.add_node("analyze", self.analyze_content)
+        self.workflow.add_node("validate", self.validate_output)
+        self.workflow.add_node("refine", self.refine_results)
+        # Set entry point and edges
+        self.workflow.set_entry_point("ingest")
+        self.workflow.add_edge("ingest", "retrieve")
+        self.workflow.add_edge("retrieve", "analyze")
+        self.workflow.add_conditional_edges(
+            "analyze",
+            self._quality_check,
+            {"valid": "validate", "invalid": "refine"}
         )
+        self.workflow.add_edge("validate", END)
+        self.workflow.add_edge("refine", "retrieve")
+    def ingest_query(self, state: AgentState) -> Dict:
+        """
+        Ingests the research query.
+        """
+        try:
+            query = state["messages"][-1].content
+            logger.info("Query ingested.")
+            return {
+                "messages": [AIMessage(content="Query ingested successfully")],
+                "context": {"raw_query": query},
+                "metadata": {"timestamp": datetime.now().isoformat()}
+            }
+        except Exception as e:
+            return self._error_state(f"Ingestion Error: {str(e)}")
+    def retrieve_documents(self, state: AgentState) -> Dict:
+        """
+        Retrieves research documents based on the query.
+        """
+        try:
+            query = state["context"]["raw_query"]
+            docs = retriever.retrieve(query, "research")
+            logger.info(f"Retrieved {len(docs)} documents for query.")
+            return {
+                "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
+                "context": {"documents": docs, "retrieval_time": time.time()}
+            }
+        except Exception as e:
+            return self._error_state(f"Retrieval Error: {str(e)}")
+    def analyze_content(self, state: AgentState) -> Dict:
+        """
+        Analyzes the retrieved documents using the DeepSeek API.
+        """
+        try:
+            docs = state["context"].get("documents", [])
+            docs_text = "\n\n".join([d.page_content for d in docs])
+            prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
+            response = self.processor.process_query(prompt)
+            if "error" in response:
+                return self._error_state(response["error"])
+            logger.info("Content analysis completed.")
+            return {
+                "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
+                "context": {"analysis": response}
+            }
+        except Exception as e:
+            return self._error_state(f"Analysis Error: {str(e)}")
+    def validate_output(self, state: AgentState) -> Dict:
+        """
+        Validates the technical analysis report.
+        """
+        analysis = state["messages"][-1].content
+        validation_prompt = (
+            f"Validate research analysis:\n{analysis}\n\n"
+            "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
+            "Respond with 'VALID' or 'INVALID'"
+        )
+        response = self.processor.process_query(validation_prompt)
+        logger.info("Output validation completed.")
+        return {
+            "messages": [
+                AIMessage(
+                    content=analysis +
+                    f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}"
+                )
+            ]
+        }
+    def refine_results(self, state: AgentState) -> Dict:
+        """
+        Refines the analysis report if validation fails.
         """
+        refinement_prompt = (
+            f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
+            "Improve:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence"
+        )
+        response = self.processor.process_query(refinement_prompt)
+        logger.info("Refinement completed.")
+        return {
+            "messages": [
+                AIMessage(
+                    content=response.get('choices', [{}])[0].get('message', {}).get('content', '')
+                )
+            ],
+            "context": state["context"]
+        }
+    def _quality_check(self, state: AgentState) -> str:
+        """
+        Checks whether the analysis report is valid.
+        """
+        content = state["messages"][-1].content
+        quality = "valid" if "VALID" in content else "invalid"
+        logger.info(f"Quality check returned: {quality}")
+        return quality
+    def _error_state(self, message: str) -> Dict:
+        """
+        Returns a standardized error state.
         """
+        logger.error(message)
+        return {
+            "messages": [AIMessage(content=f"❌ {message}")],
+            "context": {"error": True},
+            "metadata": {"status": "error"}
+        }
 # ------------------------------
+# Research Interface (Streamlit UI)
 # ------------------------------
+class ResearchInterface:
     """
+    Provides the Streamlit-based interface for executing the research workflow.
     """
+    def __init__(self) -> None:
+        self.workflow = ResearchWorkflow()
+        self._initialize_interface()
+    def _initialize_interface(self) -> None:
+        st.set_page_config(
+            page_title="NeuroResearch AI",
+            layout="wide",
+            initial_sidebar_state="expanded"
         )
+        self._inject_styles()
+        self._build_sidebar()
+        self._build_main_interface()
+    def _inject_styles(self) -> None:
+        st.markdown(
+            """
+            <style>
+            :root {
+                --primary: #2ecc71;
+                --secondary: #3498db;
+                --background: #0a0a0a;
+                --text: #ecf0f1;
+            }
+            .stApp {
+                background: var(--background);
+                color: var(--text);
+                font-family: 'Roboto', sans-serif;
+            }
+            .stTextArea textarea {
+                background: #1a1a1a !important;
+                color: var(--text) !important;
+                border: 2px solid var(--secondary);
+                border-radius: 8px;
+                padding: 1rem;
+            }
+            .stButton>button {
+                background: linear-gradient(135deg, var(--primary), var(--secondary));
+                border: none;
+                border-radius: 8px;
+                padding: 1rem 2rem;
+                transition: all 0.3s;
+            }
+            .stButton>button:hover {
+                transform: translateY(-2px);
+                box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
+            }
+            .stExpander {
+                background: #1a1a1a;
+                border: 1px solid #2a2a2a;
+                border-radius: 8px;
+                margin: 1rem 0;
+            }
+            </style>
+            """,
+            unsafe_allow_html=True
+        )
+    def _build_sidebar(self) -> None:
+        with st.sidebar:
+            st.title("🔍 Research Database")
+            st.subheader("Technical Papers")
+            for title, short in ResearchConfig.DOCUMENT_MAP.items():
+                with st.expander(short):
+                    st.markdown(f"```\n{title}\n```")
+            st.subheader("Analysis Metrics")
+            st.metric("Vector Collections", 2)
+            st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
+    def _build_main_interface(self) -> None:
+        st.title("🧠 NeuroResearch AI")
+        query = st.text_area(
+            "Research Query:",
+            height=200,
+            placeholder="Enter technical research question..."
+        )
+        if st.button("Execute Analysis", type="primary"):
+            self._execute_analysis(query)
+    def _execute_analysis(self, query: str) -> None:
+        try:
+            with st.spinner("Initializing Quantum Analysis..."):
+                results = self.workflow.app.stream({
+                    "messages": [HumanMessage(content=query)],
+                    "context": {},
+                    "metadata": {}
+                })
+                for event in results:
+                    self._render_event(event)
+                st.success("✅ Analysis Completed Successfully")
+        except Exception as e:
+            logger.error(f"Workflow execution failed: {e}")
+            st.error(
+                f"""**Analysis Failed**
+{str(e)}
+Potential issues:
+- Complex query structure
+- Document correlation failure
+- Temporal processing constraints"""
             )
+    def _render_event(self, event: Dict) -> None:
+        if 'ingest' in event:
+            with st.container():
+                st.success("✅ Query Ingested")
+        elif 'retrieve' in event:
+            with st.container():
+                docs = event['retrieve']['context'].get('documents', [])
+                st.info(f"📚 Retrieved {len(docs)} documents")
+                with st.expander("View Retrieved Documents", expanded=False):
+                    for idx, doc in enumerate(docs, start=1):
+                        st.markdown(f"**Document {idx}**")
+                        st.code(doc.page_content, language='text')
+        elif 'analyze' in event:
+            with st.container():
+                content = event['analyze']['messages'][0].content
+                with st.expander("Technical Analysis Report", expanded=True):
+                    st.markdown(content)
+        elif 'validate' in event:
+            with st.container():
+                content = event['validate']['messages'][0].content
+                if "VALID" in content:
+                    st.success("✅ Validation Passed")
+                    with st.expander("View Validated Analysis", expanded=True):
+                        st.markdown(content.split("Validation:")[0])
                 else:
+                    st.warning("⚠️ Validation Issues Detected")
+                    with st.expander("View Validation Details", expanded=True):
+                        st.markdown(content)
 if __name__ == "__main__":
+    ResearchInterface()