Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 13

Commit

99fb1d9

verified ·

1 Parent(s): e1707aa

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -37

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
-# ------------------------------
 # Imports & Initial Configuration
-# ------------------------------
 import streamlit as st
-# Set the page configuration immediately—this must be the first Streamlit command.
 st.set_page_config(page_title="NeuroResearch AI", layout="wide", initial_sidebar_state="expanded")
 from langchain_openai import OpenAIEmbeddings
@@ -15,27 +15,24 @@ from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
-import re
 import os
 import requests
 import hashlib
-import json
 import time
-from langchain.tools.retriever import create_retriever_tool
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
-# ------------------------------
 # State Schema Definition
-# ------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
-# ------------------------------
 # Configuration
-# ------------------------------
 class ResearchConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
@@ -72,9 +69,9 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
 3. Rebuild deployment""")
     st.stop()
-# ------------------------------
 # Quantum Document Processing
-# ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
         self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
@@ -90,8 +87,7 @@ class QuantumDocumentManager:
             separators=["\n\n", "\n", "|||"]
         )
         docs = splitter.create_documents(documents)
-        # Debug: log the number of chunks created for the collection.
-        st.write(f"Created {len(docs)} chunks for collection '{collection_name}'")
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
@@ -101,6 +97,7 @@ class QuantumDocumentManager:
         )
     def _document_id(self, content: str) -> str:
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
 # Initialize document collections
@@ -117,9 +114,9 @@ development_docs = qdm.create_collection([
     "Product Y: In the Performance Optimization Stage Before Release"
 ], "development")
-# ------------------------------
 # Advanced Retrieval System
-# ------------------------------
 class ResearchRetriever:
     def __init__(self):
         self.retrievers = {
@@ -138,9 +135,9 @@ class ResearchRetriever:
         }
     def retrieve(self, query: str, domain: str) -> List[Any]:
         try:
             results = self.retrievers[domain].invoke(query)
-            st.write(f"[DEBUG] Retrieved {len(results)} documents for query: '{query}' in domain '{domain}'")
             return results
         except KeyError:
             st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
@@ -148,21 +145,19 @@ class ResearchRetriever:
 retriever = ResearchRetriever()
-# ------------------------------
 # Cognitive Processing Unit
-# ------------------------------
 class CognitiveProcessor:
     def __init__(self):
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
         self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
         futures = []
-        for _ in range(3):  # Triple redundancy for robustness
-            futures.append(self.executor.submit(
-                self._execute_api_request,
-                prompt
-            ))
         results = []
         for future in as_completed(futures):
@@ -174,6 +169,7 @@ class CognitiveProcessor:
         return self._consensus_check(results)
     def _execute_api_request(self, prompt: str) -> Dict:
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json",
@@ -202,15 +198,15 @@ class CognitiveProcessor:
             return {"error": str(e)}
     def _consensus_check(self, results: List[Dict]) -> Dict:
         valid = [r for r in results if "error" not in r]
         if not valid:
             return {"error": "All API requests failed"}
-        # Choose the result with the longest content for robustness.
         return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
-# ------------------------------
 # Research Workflow Engine
-# ------------------------------
 class ResearchWorkflow:
     def __init__(self):
         self.processor = CognitiveProcessor()
@@ -225,6 +221,7 @@ class ResearchWorkflow:
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
@@ -239,9 +236,9 @@ class ResearchWorkflow:
         self.app = self.workflow.compile()
     def ingest_query(self, state: AgentState) -> Dict:
         try:
             query = state["messages"][-1].content
-            st.write(f"[DEBUG] Ingesting query: {query}")
             return {
                 "messages": [AIMessage(content="Query ingested successfully")],
                 "context": {"raw_query": query},
@@ -251,10 +248,10 @@ class ResearchWorkflow:
             return self._error_state(f"Ingestion Error: {str(e)}")
     def retrieve_documents(self, state: AgentState) -> Dict:
         try:
             query = state["context"]["raw_query"]
             docs = retriever.retrieve(query, "research")
-            st.write(f"[DEBUG] Retrieved {len(docs)} documents from retrieval node.")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
                 "context": {
@@ -266,14 +263,15 @@ class ResearchWorkflow:
             return self._error_state(f"Retrieval Error: {str(e)}")
     def analyze_content(self, state: AgentState) -> Dict:
         try:
-            # Ensure documents are present before proceeding.
             if "documents" not in state["context"] or not state["context"]["documents"]:
                 return self._error_state("No documents retrieved; please check your query or retrieval process.")
-            # Concatenate all document content for analysis.
-            docs = "\n\n".join([d.page_content for d in state["context"]["documents"] if hasattr(d, "page_content")])
-            st.write(f"[DEBUG] Analyzing content from {len(state['context']['documents'])} documents.")
             prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
             response = self.processor.process_query(prompt)
@@ -288,6 +286,7 @@ class ResearchWorkflow:
             return self._error_state(f"Analysis Error: {str(e)}")
     def validate_output(self, state: AgentState) -> Dict:
         analysis = state["messages"][-1].content
         validation_prompt = f"""Validate research analysis:
 {analysis}
@@ -306,6 +305,7 @@ Respond with 'VALID' or 'INVALID'"""
         }
     def refine_results(self, state: AgentState) -> Dict:
         refinement_prompt = f"""Refine this analysis:
 {state["messages"][-1].content}
@@ -321,29 +321,32 @@ Improve:
         }
     def _quality_check(self, state: AgentState) -> str:
         content = state["messages"][-1].content
         return "valid" if "VALID" in content else "invalid"
     def _error_state(self, message: str) -> Dict:
-        st.write(f"[ERROR] {message}")
         return {
             "messages": [AIMessage(content=f"❌ {message}")],
             "context": {"error": True},
             "metadata": {"status": "error"}
         }
-# ------------------------------
 # Research Interface
-# ------------------------------
 class ResearchInterface:
     def __init__(self):
         self.workflow = ResearchWorkflow()
-        # Do not call st.set_page_config here because it has already been called at the top.
         self._inject_styles()
         self._build_sidebar()
         self._build_main_interface()
     def _inject_styles(self):
         st.markdown("""
         <style>
         :root {
@@ -390,6 +393,7 @@ class ResearchInterface:
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
         with st.sidebar:
             st.title("🔍 Research Database")
             st.subheader("Technical Papers")
@@ -402,6 +406,7 @@ class ResearchInterface:
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self):
         st.title("🧠 NeuroResearch AI")
         query = st.text_area("Research Query:", height=200,
                              placeholder="Enter technical research question...")
@@ -410,6 +415,7 @@ class ResearchInterface:
             self._execute_analysis(query)
     def _execute_analysis(self, query: str):
         try:
             with st.spinner("Initializing Quantum Analysis..."):
                 results = self.workflow.app.stream(
@@ -427,6 +433,7 @@ Potential issues:
 - Temporal processing constraints""")
     def _render_event(self, event: Dict):
         if 'ingest' in event:
             with st.container():
                 st.success("✅ Query Ingested")
@@ -455,5 +462,8 @@ Potential issues:
                     with st.expander("View Validation Details", expanded=True):
                         st.markdown(content)
 if __name__ == "__main__":
     ResearchInterface()

+# ---------------------------------------------
 # Imports & Initial Configuration
+# ---------------------------------------------
 import streamlit as st
+# IMPORTANT: Must be the first Streamlit command
 st.set_page_config(page_title="NeuroResearch AI", layout="wide", initial_sidebar_state="expanded")
 from langchain_openai import OpenAIEmbeddings
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
 import os
 import requests
 import hashlib
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
+# ---------------------------------------------
 # State Schema Definition
+# ---------------------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
+# ---------------------------------------------
 # Configuration
+# ---------------------------------------------
 class ResearchConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
 3. Rebuild deployment""")
     st.stop()
+# ---------------------------------------------
 # Quantum Document Processing
+# ---------------------------------------------
 class QuantumDocumentManager:
     def __init__(self):
         self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
             separators=["\n\n", "\n", "|||"]
         )
         docs = splitter.create_documents(documents)
+        # Removed debug line that displayed chunk creation count
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
         )
     def _document_id(self, content: str) -> str:
+        """Create a unique ID for each document chunk."""
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
 # Initialize document collections
     "Product Y: In the Performance Optimization Stage Before Release"
 ], "development")
+# ---------------------------------------------
 # Advanced Retrieval System
+# ---------------------------------------------
 class ResearchRetriever:
     def __init__(self):
         self.retrievers = {
         }
     def retrieve(self, query: str, domain: str) -> List[Any]:
+        """Retrieve documents from the specified domain using the appropriate retriever."""
         try:
             results = self.retrievers[domain].invoke(query)
             return results
         except KeyError:
             st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
 retriever = ResearchRetriever()
+# ---------------------------------------------
 # Cognitive Processing Unit
+# ---------------------------------------------
 class CognitiveProcessor:
     def __init__(self):
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
         self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
+        """Send the prompt to the DeepSeek API using triple redundancy for robustness."""
         futures = []
+        for _ in range(3):
+            futures.append(self.executor.submit(self._execute_api_request, prompt))
         results = []
         for future in as_completed(futures):
         return self._consensus_check(results)
     def _execute_api_request(self, prompt: str) -> Dict:
+        """Make a single request to the DeepSeek API."""
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json",
             return {"error": str(e)}
     def _consensus_check(self, results: List[Dict]) -> Dict:
+        """Pick the best result by comparing content length among successful responses."""
         valid = [r for r in results if "error" not in r]
         if not valid:
             return {"error": "All API requests failed"}
         return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
+# ---------------------------------------------
 # Research Workflow Engine
+# ---------------------------------------------
 class ResearchWorkflow:
     def __init__(self):
         self.processor = CognitiveProcessor()
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
+        # Define workflow transitions
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
         self.app = self.workflow.compile()
     def ingest_query(self, state: AgentState) -> Dict:
+        """Extract the user query and store it in the state."""
         try:
             query = state["messages"][-1].content
             return {
                 "messages": [AIMessage(content="Query ingested successfully")],
                 "context": {"raw_query": query},
             return self._error_state(f"Ingestion Error: {str(e)}")
     def retrieve_documents(self, state: AgentState) -> Dict:
+        """Retrieve relevant documents from the 'research' domain."""
         try:
             query = state["context"]["raw_query"]
             docs = retriever.retrieve(query, "research")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
                 "context": {
             return self._error_state(f"Retrieval Error: {str(e)}")
     def analyze_content(self, state: AgentState) -> Dict:
+        """Concatenate document contents and analyze them using the CognitiveProcessor."""
         try:
             if "documents" not in state["context"] or not state["context"]["documents"]:
                 return self._error_state("No documents retrieved; please check your query or retrieval process.")
+            docs = "\n\n".join([
+                d.page_content for d in state["context"]["documents"]
+                if hasattr(d, "page_content") and d.page_content
+            ])
             prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
             response = self.processor.process_query(prompt)
             return self._error_state(f"Analysis Error: {str(e)}")
     def validate_output(self, state: AgentState) -> Dict:
+        """Validate the technical correctness of the analysis output."""
         analysis = state["messages"][-1].content
         validation_prompt = f"""Validate research analysis:
 {analysis}
         }
     def refine_results(self, state: AgentState) -> Dict:
+        """Refine the analysis based on the validation feedback."""
         refinement_prompt = f"""Refine this analysis:
 {state["messages"][-1].content}
         }
     def _quality_check(self, state: AgentState) -> str:
+        """Check if the validation step indicates a 'VALID' or 'INVALID' output."""
         content = state["messages"][-1].content
         return "valid" if "VALID" in content else "invalid"
     def _error_state(self, message: str) -> Dict:
+        """Return an error message and mark the state as erroneous."""
+        st.error(f"[ERROR] {message}")
         return {
             "messages": [AIMessage(content=f"❌ {message}")],
             "context": {"error": True},
             "metadata": {"status": "error"}
         }
+# ---------------------------------------------
 # Research Interface
+# ---------------------------------------------
 class ResearchInterface:
     def __init__(self):
         self.workflow = ResearchWorkflow()
+        # We've already set the page config at the top.
         self._inject_styles()
         self._build_sidebar()
         self._build_main_interface()
     def _inject_styles(self):
+        """Inject custom CSS for a sleek interface."""
         st.markdown("""
         <style>
         :root {
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
+        """Construct the left sidebar with document info and metrics."""
         with st.sidebar:
             st.title("🔍 Research Database")
             st.subheader("Technical Papers")
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self):
+        """Construct the main interface for query input and result display."""
         st.title("🧠 NeuroResearch AI")
         query = st.text_area("Research Query:", height=200,
                              placeholder="Enter technical research question...")
             self._execute_analysis(query)
     def _execute_analysis(self, query: str):
+        """Execute the entire research workflow and render the results."""
         try:
             with st.spinner("Initializing Quantum Analysis..."):
                 results = self.workflow.app.stream(
 - Temporal processing constraints""")
     def _render_event(self, event: Dict):
+        """Render each node's output in the UI as it streams through the workflow."""
         if 'ingest' in event:
             with st.container():
                 st.success("✅ Query Ingested")
                     with st.expander("View Validation Details", expanded=True):
                         st.markdown(content)
+# ---------------------------------------------
+# Main Execution
+# ---------------------------------------------
 if __name__ == "__main__":
     ResearchInterface()