Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on 26 days ago

Commit

2c0f60e

verified ·

1 Parent(s): 99fb1d9

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -84

app.py CHANGED Viewed

@@ -1,10 +1,6 @@
-# ---------------------------------------------
-# Imports & Initial Configuration
-# ---------------------------------------------
-import streamlit as st
-# IMPORTANT: Must be the first Streamlit command
-st.set_page_config(page_title="NeuroResearch AI", layout="wide", initial_sidebar_state="expanded")
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
@@ -15,24 +11,28 @@ from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
 import os
 import requests
 import hashlib
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
-# ---------------------------------------------
 # State Schema Definition
-# ---------------------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
-# ---------------------------------------------
 # Configuration
-# ---------------------------------------------
 class ResearchConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
@@ -61,17 +61,17 @@ Respond with:
 Format: Markdown with LaTeX mathematical notation where applicable
 """
-# Validate API key configuration
 if not ResearchConfig.DEEPSEEK_API_KEY:
     st.error("""**Research Portal Configuration Required**
-1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
-2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
-3. Rebuild deployment""")
     st.stop()
-# ---------------------------------------------
 # Quantum Document Processing
-# ---------------------------------------------
 class QuantumDocumentManager:
     def __init__(self):
         self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
@@ -87,7 +87,6 @@ class QuantumDocumentManager:
             separators=["\n\n", "\n", "|||"]
         )
         docs = splitter.create_documents(documents)
-        # Removed debug line that displayed chunk creation count
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
@@ -97,7 +96,6 @@ class QuantumDocumentManager:
         )
     def _document_id(self, content: str) -> str:
-        """Create a unique ID for each document chunk."""
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
 # Initialize document collections
@@ -114,9 +112,9 @@ development_docs = qdm.create_collection([
     "Product Y: In the Performance Optimization Stage Before Release"
 ], "development")
-# ---------------------------------------------
 # Advanced Retrieval System
-# ---------------------------------------------
 class ResearchRetriever:
     def __init__(self):
         self.retrievers = {
@@ -135,29 +133,28 @@ class ResearchRetriever:
         }
     def retrieve(self, query: str, domain: str) -> List[Any]:
-        """Retrieve documents from the specified domain using the appropriate retriever."""
         try:
-            results = self.retrievers[domain].invoke(query)
-            return results
         except KeyError:
-            st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
             return []
 retriever = ResearchRetriever()
-# ---------------------------------------------
 # Cognitive Processing Unit
-# ---------------------------------------------
 class CognitiveProcessor:
     def __init__(self):
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
         self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
-        """Send the prompt to the DeepSeek API using triple redundancy for robustness."""
         futures = []
-        for _ in range(3):
-            futures.append(self.executor.submit(self._execute_api_request, prompt))
         results = []
         for future in as_completed(futures):
@@ -169,7 +166,6 @@ class CognitiveProcessor:
         return self._consensus_check(results)
     def _execute_api_request(self, prompt: str) -> Dict:
-        """Make a single request to the DeepSeek API."""
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json",
@@ -198,15 +194,14 @@ class CognitiveProcessor:
             return {"error": str(e)}
     def _consensus_check(self, results: List[Dict]) -> Dict:
-        """Pick the best result by comparing content length among successful responses."""
         valid = [r for r in results if "error" not in r]
         if not valid:
             return {"error": "All API requests failed"}
         return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
-# ---------------------------------------------
 # Research Workflow Engine
-# ---------------------------------------------
 class ResearchWorkflow:
     def __init__(self):
         self.processor = CognitiveProcessor()
@@ -214,14 +209,12 @@ class ResearchWorkflow:
         self._build_workflow()
     def _build_workflow(self):
-        # Register nodes in the state graph
         self.workflow.add_node("ingest", self.ingest_query)
         self.workflow.add_node("retrieve", self.retrieve_documents)
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
-        # Define workflow transitions
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
@@ -236,7 +229,6 @@ class ResearchWorkflow:
         self.app = self.workflow.compile()
     def ingest_query(self, state: AgentState) -> Dict:
-        """Extract the user query and store it in the state."""
         try:
             query = state["messages"][-1].content
             return {
@@ -248,7 +240,6 @@ class ResearchWorkflow:
             return self._error_state(f"Ingestion Error: {str(e)}")
     def retrieve_documents(self, state: AgentState) -> Dict:
-        """Retrieve relevant documents from the 'research' domain."""
         try:
             query = state["context"]["raw_query"]
             docs = retriever.retrieve(query, "research")
@@ -263,15 +254,8 @@ class ResearchWorkflow:
             return self._error_state(f"Retrieval Error: {str(e)}")
     def analyze_content(self, state: AgentState) -> Dict:
-        """Concatenate document contents and analyze them using the CognitiveProcessor."""
         try:
-            if "documents" not in state["context"] or not state["context"]["documents"]:
-                return self._error_state("No documents retrieved; please check your query or retrieval process.")
-            docs = "\n\n".join([
-                d.page_content for d in state["context"]["documents"]
-                if hasattr(d, "page_content") and d.page_content
-            ])
             prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
             response = self.processor.process_query(prompt)
@@ -286,18 +270,17 @@ class ResearchWorkflow:
             return self._error_state(f"Analysis Error: {str(e)}")
     def validate_output(self, state: AgentState) -> Dict:
-        """Validate the technical correctness of the analysis output."""
         analysis = state["messages"][-1].content
         validation_prompt = f"""Validate research analysis:
-{analysis}
-Check for:
-1. Technical accuracy
-2. Citation support
-3. Logical consistency
-4. Methodological soundness
-Respond with 'VALID' or 'INVALID'"""
         response = self.processor.process_query(validation_prompt)
         return {
@@ -305,14 +288,13 @@ Respond with 'VALID' or 'INVALID'"""
         }
     def refine_results(self, state: AgentState) -> Dict:
-        """Refine the analysis based on the validation feedback."""
         refinement_prompt = f"""Refine this analysis:
-{state["messages"][-1].content}
-Improve:
-1. Technical precision
-2. Empirical grounding
-3. Theoretical coherence"""
         response = self.processor.process_query(refinement_prompt)
         return {
@@ -321,32 +303,35 @@ Improve:
         }
     def _quality_check(self, state: AgentState) -> str:
-        """Check if the validation step indicates a 'VALID' or 'INVALID' output."""
         content = state["messages"][-1].content
         return "valid" if "VALID" in content else "invalid"
     def _error_state(self, message: str) -> Dict:
-        """Return an error message and mark the state as erroneous."""
-        st.error(f"[ERROR] {message}")
         return {
             "messages": [AIMessage(content=f"❌ {message}")],
             "context": {"error": True},
             "metadata": {"status": "error"}
         }
-# ---------------------------------------------
 # Research Interface
-# ---------------------------------------------
 class ResearchInterface:
     def __init__(self):
         self.workflow = ResearchWorkflow()
-        # We've already set the page config at the top.
         self._inject_styles()
         self._build_sidebar()
         self._build_main_interface()
     def _inject_styles(self):
-        """Inject custom CSS for a sleek interface."""
         st.markdown("""
         <style>
         :root {
@@ -393,7 +378,6 @@ class ResearchInterface:
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
-        """Construct the left sidebar with document info and metrics."""
         with st.sidebar:
             st.title("🔍 Research Database")
             st.subheader("Technical Papers")
@@ -406,37 +390,37 @@ class ResearchInterface:
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self):
-        """Construct the main interface for query input and result display."""
         st.title("🧠 NeuroResearch AI")
         query = st.text_area("Research Query:", height=200,
-                             placeholder="Enter technical research question...")
         if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str):
-        """Execute the entire research workflow and render the results."""
         try:
             with st.spinner("Initializing Quantum Analysis..."):
                 results = self.workflow.app.stream(
                     {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
                 )
                 for event in results:
                     self._render_event(event)
                 st.success("✅ Analysis Completed Successfully")
         except Exception as e:
             st.error(f"""**Analysis Failed**
-{str(e)}
-Potential issues:
-- Complex query structure
-- Document correlation failure
-- Temporal processing constraints""")
     def _render_event(self, event: Dict):
-        """Render each node's output in the UI as it streams through the workflow."""
         if 'ingest' in event:
             with st.container():
                 st.success("✅ Query Ingested")
         elif 'retrieve' in event:
             with st.container():
                 docs = event['retrieve']['context']['documents']
@@ -445,11 +429,13 @@ Potential issues:
                     for i, doc in enumerate(docs, 1):
                         st.markdown(f"**Document {i}**")
                         st.code(doc.page_content, language='text')
         elif 'analyze' in event:
             with st.container():
                 content = event['analyze']['messages'][0].content
                 with st.expander("Technical Analysis Report", expanded=True):
                     st.markdown(content)
         elif 'validate' in event:
             with st.container():
                 content = event['validate']['messages'][0].content
@@ -462,8 +448,5 @@ Potential issues:
                     with st.expander("View Validation Details", expanded=True):
                         st.markdown(content)
-# ---------------------------------------------
-# Main Execution
-# ---------------------------------------------
 if __name__ == "__main__":
-    ResearchInterface()

+# ------------------------------
+# Imports & Dependencies
+# ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
+import re
 import os
+import streamlit as st
 import requests
 import hashlib
+import json
 import time
+from langchain.tools.retriever import create_retriever_tool
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
+# ------------------------------
 # State Schema Definition
+# ------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
+# ------------------------------
 # Configuration
+# ------------------------------
 class ResearchConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
 Format: Markdown with LaTeX mathematical notation where applicable
 """
+# Validation
 if not ResearchConfig.DEEPSEEK_API_KEY:
     st.error("""**Research Portal Configuration Required**
+    1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
+    2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
+    3. Rebuild deployment""")
     st.stop()
+# ------------------------------
 # Quantum Document Processing
+# ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
         self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
             separators=["\n\n", "\n", "|||"]
         )
         docs = splitter.create_documents(documents)
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
         )
     def _document_id(self, content: str) -> str:
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
 # Initialize document collections
     "Product Y: In the Performance Optimization Stage Before Release"
 ], "development")
+# ------------------------------
 # Advanced Retrieval System
+# ------------------------------
 class ResearchRetriever:
     def __init__(self):
         self.retrievers = {
         }
     def retrieve(self, query: str, domain: str) -> List[Any]:
         try:
+            return self.retrievers[domain].invoke(query)
         except KeyError:
             return []
 retriever = ResearchRetriever()
+# ------------------------------
 # Cognitive Processing Unit
+# ------------------------------
 class CognitiveProcessor:
     def __init__(self):
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
         self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
         futures = []
+        for _ in range(3):  # Triple redundancy
+            futures.append(self.executor.submit(
+                self._execute_api_request,
+                prompt
+            ))
         results = []
         for future in as_completed(futures):
         return self._consensus_check(results)
     def _execute_api_request(self, prompt: str) -> Dict:
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json",
             return {"error": str(e)}
     def _consensus_check(self, results: List[Dict]) -> Dict:
         valid = [r for r in results if "error" not in r]
         if not valid:
             return {"error": "All API requests failed"}
         return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
+# ------------------------------
 # Research Workflow Engine
+# ------------------------------
 class ResearchWorkflow:
     def __init__(self):
         self.processor = CognitiveProcessor()
         self._build_workflow()
     def _build_workflow(self):
         self.workflow.add_node("ingest", self.ingest_query)
         self.workflow.add_node("retrieve", self.retrieve_documents)
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
         self.app = self.workflow.compile()
     def ingest_query(self, state: AgentState) -> Dict:
         try:
             query = state["messages"][-1].content
             return {
             return self._error_state(f"Ingestion Error: {str(e)}")
     def retrieve_documents(self, state: AgentState) -> Dict:
         try:
             query = state["context"]["raw_query"]
             docs = retriever.retrieve(query, "research")
             return self._error_state(f"Retrieval Error: {str(e)}")
     def analyze_content(self, state: AgentState) -> Dict:
         try:
+            docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
             prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
             response = self.processor.process_query(prompt)
             return self._error_state(f"Analysis Error: {str(e)}")
     def validate_output(self, state: AgentState) -> Dict:
         analysis = state["messages"][-1].content
         validation_prompt = f"""Validate research analysis:
+        {analysis}
+        Check for:
+        1. Technical accuracy
+        2. Citation support
+        3. Logical consistency
+        4. Methodological soundness
+        Respond with 'VALID' or 'INVALID'"""
         response = self.processor.process_query(validation_prompt)
         return {
         }
     def refine_results(self, state: AgentState) -> Dict:
         refinement_prompt = f"""Refine this analysis:
+        {state["messages"][-1].content}
+        Improve:
+        1. Technical precision
+        2. Empirical grounding
+        3. Theoretical coherence"""
         response = self.processor.process_query(refinement_prompt)
         return {
         }
     def _quality_check(self, state: AgentState) -> str:
         content = state["messages"][-1].content
         return "valid" if "VALID" in content else "invalid"
     def _error_state(self, message: str) -> Dict:
         return {
             "messages": [AIMessage(content=f"❌ {message}")],
             "context": {"error": True},
             "metadata": {"status": "error"}
         }
+# ------------------------------
 # Research Interface
+# ------------------------------
 class ResearchInterface:
     def __init__(self):
         self.workflow = ResearchWorkflow()
+        self._initialize_interface()
+    def _initialize_interface(self):
+        st.set_page_config(
+            page_title="NeuroResearch AI",
+            layout="wide",
+            initial_sidebar_state="expanded"
+        )
         self._inject_styles()
         self._build_sidebar()
         self._build_main_interface()
     def _inject_styles(self):
         st.markdown("""
         <style>
         :root {
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
         with st.sidebar:
             st.title("🔍 Research Database")
             st.subheader("Technical Papers")
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self):
         st.title("🧠 NeuroResearch AI")
         query = st.text_area("Research Query:", height=200,
+                           placeholder="Enter technical research question...")
         if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str):
         try:
             with st.spinner("Initializing Quantum Analysis..."):
                 results = self.workflow.app.stream(
                     {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
                 )
                 for event in results:
                     self._render_event(event)
                 st.success("✅ Analysis Completed Successfully")
         except Exception as e:
             st.error(f"""**Analysis Failed**
+            {str(e)}
+            Potential issues:
+            - Complex query structure
+            - Document correlation failure
+            - Temporal processing constraints""")
     def _render_event(self, event: Dict):
         if 'ingest' in event:
             with st.container():
                 st.success("✅ Query Ingested")
         elif 'retrieve' in event:
             with st.container():
                 docs = event['retrieve']['context']['documents']
                     for i, doc in enumerate(docs, 1):
                         st.markdown(f"**Document {i}**")
                         st.code(doc.page_content, language='text')
         elif 'analyze' in event:
             with st.container():
                 content = event['analyze']['messages'][0].content
                 with st.expander("Technical Analysis Report", expanded=True):
                     st.markdown(content)
         elif 'validate' in event:
             with st.container():
                 content = event['validate']['messages'][0].content
                     with st.expander("View Validation Details", expanded=True):
                         st.markdown(content)
 if __name__ == "__main__":
+    ResearchInterface()