Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 13

Commit

fc628b4

verified ·

1 Parent(s): c436283

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -134

app.py CHANGED Viewed

@@ -9,8 +9,8 @@ from langchain_core.documents import Document
 from langgraph.graph import END, StateGraph
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
-from langgraph.graph.message import add_messages  # Add this import
 import chromadb
 import numpy as np
 import os
 import streamlit as st
@@ -41,30 +41,24 @@ class ResearchConfig:
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
     RESEARCH_EMBEDDING = np.random.randn(1536)
     DOCUMENT_MAP = {
         "CV-Transformer Hybrid Architecture": {
-            "title": "Research Report: CV-Transformer Model (98% Accuracy)",
             "content": """
-            Hybrid architecture combining CNNs and Transformers achieves 98% image recognition accuracy.
-            Key equation: $f(x) = \text{Attention}(\text{CNN}(x))$
-            Validation on ImageNet-1k: Top-1 Accuracy 98.2%, Inference Speed 42ms/img
             """
         },
         "Transformer Architecture Analysis": {
-            "title": "Academic Paper: Transformers in NLP",
             "content": """
-            Self-attention mechanism remains core innovation:
             $\text{Attention}(Q, K, V) = \text{softmax}(\frac{QK^T}{\sqrt{d_k}})V$
-            GLUE Benchmark Score: 92.4%, Training Efficiency: 1.8x vs RNNs
-            """
-        },
-        "Quantum ML Frontiers": {
-            "title": "Quantum Machine Learning Review",
-            "content": """
-            Quantum gradient descent enables faster optimization:
-            $\theta_{t+1} = \theta_t - \eta \nabla_\theta \mathcal{L}(\theta_t)$
-            100x speedup on optimization tasks, 58% energy reduction
             """
         }
     }
@@ -73,11 +67,11 @@ class ResearchConfig:
 {context}
 Respond in MARKDOWN with:
-1. **Key Technical Contributions** (bullet points with equations)
-2. **Novel Methodologies** (algorithms with math notation)
 3. **Empirical Results** (comparative metrics)
-4. **Applications** (domain-specific implementations)
-5. **Limitations** (theoretical/practical boundaries)
 Include LaTeX equations where applicable."""
@@ -89,16 +83,36 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
     st.stop()
 # ------------------------------
-# Document Processing System
 # ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
-        self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
     def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -123,6 +137,10 @@ class QuantumDocumentManager:
             documents=docs,
             embedding=self.embeddings,
             collection_name=collection_name,
             ids=[self._document_id(doc.page_content) for doc in docs]
         )
@@ -131,10 +149,10 @@ class QuantumDocumentManager:
 # Initialize document system
 qdm = QuantumDocumentManager()
-research_docs = qdm.create_collection(ResearchConfig.DOCUMENT_MAP, "research")
 # ------------------------------
-# Intelligent Retrieval System
 # ------------------------------
 class ResearchRetriever:
     def __init__(self):
@@ -150,7 +168,7 @@ class ResearchRetriever:
     def retrieve(self, query: str) -> List[Document]:
         try:
             docs = self.retriever.invoke(query)
-            if not docs:
                 raise ValueError("No relevant documents found")
             return docs
         except Exception as e:
@@ -158,7 +176,7 @@ class ResearchRetriever:
             return []
 # ------------------------------
-# Robust Processing Core
 # ------------------------------
 class CognitiveProcessor:
     def __init__(self):
@@ -206,7 +224,7 @@ class CognitiveProcessor:
         return valid[np.argmax(tech_scores)]
 # ------------------------------
-# Validation Workflow Engine
 # ------------------------------
 class ResearchWorkflow:
     def __init__(self):
@@ -216,11 +234,11 @@ class ResearchWorkflow:
         self._build_workflow()
     def _build_workflow(self):
-        self.workflow.add_node("ingest", self.ingest_query)
-        self.workflow.add_node("retrieve", self.retrieve_documents)
-        self.workflow.add_node("analyze", self.analyze_content)
-        self.workflow.add_node("validate", self.validate_output)
-        self.workflow.add_node("refine", self.refine_results)
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
@@ -235,100 +253,93 @@ class ResearchWorkflow:
         self.app = self.workflow.compile()
-    def ingest_query(self, state: AgentState) -> Dict:
         try:
             query = state["messages"][-1].content
             return {
-                "messages": [AIMessage(content="Query ingested successfully")],
-                "context": {"raw_query": query},
                 "metadata": {"timestamp": datetime.now().isoformat()}
             }
         except Exception as e:
             return self._error_state(f"Ingestion Error: {str(e)}")
-    def retrieve_documents(self, state: AgentState) -> Dict:
         try:
-            docs = self.retriever.retrieve(state["context"]["raw_query"])
-            if not docs:
-                return self._error_state("Document correlation failure - no relevant papers found")
             return {
-                "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
-                "context": {"documents": docs}
             }
         except Exception as e:
             return self._error_state(f"Retrieval Error: {str(e)}")
-    def analyze_content(self, state: AgentState) -> Dict:
         try:
-            docs = state["context"]["documents"]
-            context = "\n\n".join([f"### {doc.metadata['title']}\n{doc.page_content}" for doc in docs])
             prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=context)
             response = self.processor.process_query(prompt)
             if "error" in response:
                 raise RuntimeError(response["error"])
-            analysis = response['choices'][0]['message']['content']
-            self._validate_analysis_structure(analysis)
-            return {
-                "messages": [AIMessage(content=analysis)],
-                "context": {"analysis": analysis}
-            }
         except Exception as e:
             return self._error_state(f"Analysis Error: {str(e)}")
-    def validate_output(self, state: AgentState) -> Dict:
         validation_prompt = f"""Validate this technical analysis:
 {state["messages"][-1].content}
 Check for:
 1. Mathematical accuracy
-2. Empirical evidence
-3. Technical depth
-4. Logical consistency
 Respond with 'VALID' or 'INVALID'"""
         response = self.processor.process_query(validation_prompt)
-        content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
         return {
-            "messages": [AIMessage(content=f"{state['messages'][-1].content}\n\n## Validation\n{content}")],
-            "context": {"valid": "VALID" in content}
         }
-    def refine_results(self, state: AgentState) -> Dict:
         refinement_prompt = f"""Improve this analysis:
 {state["messages"][-1].content}
 Focus on:
-1. Enhancing mathematical rigor
-2. Adding empirical references
-3. Strengthening technical arguments"""
         response = self.processor.process_query(refinement_prompt)
-        return {
-            "messages": [AIMessage(content=response['choices'][0]['message']['content'])],
-            "context": state["context"]
-        }
     def _quality_check(self, state: AgentState) -> str:
         return "valid" if state.get("context", {}).get("valid", False) else "invalid"
-    def _validate_analysis_structure(self, content: str):
         required_sections = [
-            "Key Technical Contributions",
-            "Novel Methodologies",
             "Empirical Results",
             "Applications",
             "Limitations"
         ]
         missing = [s for s in required_sections if f"## {s}" not in content]
         if missing:
-            raise ValueError(f"Missing critical sections: {', '.join(missing)}")
         if not re.search(r"\$.*?\$", content):
-            raise ValueError("Analysis lacks required mathematical notation")
     def _error_state(self, message: str) -> Dict:
         return {
@@ -338,22 +349,22 @@ Focus on:
         }
 # ------------------------------
-# Research Interface
 # ------------------------------
 class ResearchInterface:
     def __init__(self):
         self.workflow = ResearchWorkflow()
-        self._initialize_interface()
-    def _initialize_interface(self):
         st.set_page_config(
-            page_title="NeuroResearch AI",
             layout="wide",
             initial_sidebar_state="expanded"
         )
         self._inject_styles()
         self._build_sidebar()
-        self._build_main_interface()
     def _inject_styles(self):
         st.markdown("""
@@ -362,103 +373,71 @@ class ResearchInterface:
             --primary: #2ecc71;
             --secondary: #3498db;
             --background: #0a0a0a;
-            --text: #ecf0f1;
         }
         .stApp {
             background: var(--background);
-            color: var(--text);
-            font-family: 'Roboto', sans-serif;
         }
         .stTextArea textarea {
             background: #1a1a1a !important;
-            color: var(--text) !important;
-            border: 2px solid var(--secondary);
-            border-radius: 8px;
-            padding: 1rem;
-        }
-        .stButton>button {
-            background: linear-gradient(135deg, var(--primary), var(--secondary));
-            border: none;
-            border-radius: 8px;
-            padding: 1rem 2rem;
-            transition: all 0.3s;
         }
-        .stButton>button:hover {
-            transform: translateY(-2px);
-            box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
-        }
-        .stExpander {
-            background: #1a1a1a;
-            border: 1px solid #2a2a2a;
-            border-radius: 8px;
-            margin: 1rem 0;
-        }
         code {
-            color: #2ecc71;
             background: #002200;
             padding: 2px 4px;
-            border-radius: 4px;
         }
         </style>
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
         with st.sidebar:
-            st.title("🔍 Research Database")
             for key, data in ResearchConfig.DOCUMENT_MAP.items():
                 with st.expander(data["title"]):
-                    st.markdown(f"```\n{data['content']}\n```")
-            st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
-            st.metric("Document Chunks", len(research_docs.get()['ids']))
-    def _build_main_interface(self):
-        st.title("🧠 NeuroResearch AI")
-        query = st.text_area("Research Query:", height=200,
-                           placeholder="Enter technical research question...")
-        if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str):
         try:
-            with st.spinner("Performing deep technical analysis..."):
                 result = self.workflow.app.invoke(
                     {"messages": [HumanMessage(content=query)]}
                 )
                 if result.get("context", {}).get("error"):
-                    self._show_error(result["context"].get("error", "Unknown error"))
                 else:
-                    self._display_results(result)
         except Exception as e:
             self._show_error(str(e))
-    def _display_results(self, result):
-        content = result["messages"][-1].content
-        with st.expander("Technical Analysis Report", expanded=True):
-            st.markdown(content)
-        with st.expander("Source Documents", expanded=False):
-            for doc in result["context"].get("documents", []):
                 st.markdown(f"**{doc.metadata['title']}**")
                 st.code(doc.page_content, language='latex')
     def _show_error(self, message):
         st.error(f"""
-        ⚠️ Analysis Failed: {message}
-        Troubleshooting Steps:
-        1. Check query specificity
-        2. Verify document connections
-        3. Ensure mathematical notation in sources
-        4. Review API key validity
-        5. Simplify complex query structures
         """)
 if __name__ == "__main__":

 from langgraph.graph import END, StateGraph
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
+from chromadb.config import Settings
 import numpy as np
 import os
 import streamlit as st
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
     RESEARCH_EMBEDDING = np.random.randn(1536)
+    TENANT = "research_tenant"
+    DATABASE = "ai_papers_db"
     DOCUMENT_MAP = {
         "CV-Transformer Hybrid Architecture": {
+            "title": "Hybrid CV-Transformer Model (98% Accuracy)",
             "content": """
+            Combines CNN feature extraction with transformer attention mechanisms.
+            Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
+            ImageNet-1k: 98.2% Top-1 Accuracy, 42ms/inference
             """
         },
         "Transformer Architecture Analysis": {
+            "title": "Transformer Architectures in NLP",
             "content": """
+            Self-attention mechanisms enable parallel processing of sequences.
             $\text{Attention}(Q, K, V) = \text{softmax}(\frac{QK^T}{\sqrt{d_k}})V$
+            GLUE Score: 92.4%, Training Efficiency: 1.8x vs RNNs
             """
         }
     }
 {context}
 Respond in MARKDOWN with:
+1. **Key Innovations** (mathematical formulations)
+2. **Methodologies** (algorithms & architectures)
 3. **Empirical Results** (comparative metrics)
+4. **Applications** (industry use cases)
+5. **Limitations** (theoretical boundaries)
 Include LaTeX equations where applicable."""
     st.stop()
 # ------------------------------
+# ChromaDB Document Manager (Fixed)
 # ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
+        self.client_settings = Settings(
+            chroma_db_impl="duckdb+parquet",
+            persist_directory=ResearchConfig.CHROMA_PATH,
+            anonymized_telemetry=False
+        )
+        self.client = chromadb.Client(self.client_settings)
+        self._initialize_tenant_db()
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
+    def _initialize_tenant_db(self):
+        try:
+            self.client.create_tenant(ResearchConfig.TENANT)
+        except chromadb.db.base.UniqueConstraintError:
+            pass  # Tenant exists
+        try:
+            self.client.create_database(
+                ResearchConfig.DATABASE,
+                tenant=ResearchConfig.TENANT
+            )
+        except chromadb.db.base.UniqueConstraintError:
+            pass  # Database exists
     def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
             documents=docs,
             embedding=self.embeddings,
             collection_name=collection_name,
+            client=self.client,
+            tenant=ResearchConfig.TENANT,
+            database=ResearchConfig.DATABASE,
+            collection_metadata={"hnsw:space": "cosine"},
             ids=[self._document_id(doc.page_content) for doc in docs]
         )
 # Initialize document system
 qdm = QuantumDocumentManager()
+research_docs = qdm.create_collection(ResearchConfig.DOCUMENT_MAP, "research_papers")
 # ------------------------------
+# Retrieval System
 # ------------------------------
 class ResearchRetriever:
     def __init__(self):
     def retrieve(self, query: str) -> List[Document]:
         try:
             docs = self.retriever.invoke(query)
+            if len(docs) < 1:
                 raise ValueError("No relevant documents found")
             return docs
         except Exception as e:
             return []
 # ------------------------------
+# Analysis Processor
 # ------------------------------
 class CognitiveProcessor:
     def __init__(self):
         return valid[np.argmax(tech_scores)]
 # ------------------------------
+# Workflow Engine
 # ------------------------------
 class ResearchWorkflow:
     def __init__(self):
         self._build_workflow()
     def _build_workflow(self):
+        self.workflow.add_node("ingest", self.ingest)
+        self.workflow.add_node("retrieve", self.retrieve)
+        self.workflow.add_node("analyze", self.analyze)
+        self.workflow.add_node("validate", self.validate)
+        self.workflow.add_node("refine", self.refine)
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.app = self.workflow.compile()
+    def ingest(self, state: AgentState) -> Dict:
         try:
             query = state["messages"][-1].content
             return {
+                "messages": [AIMessage(content="Query ingested")],
+                "context": {"query": query},
                 "metadata": {"timestamp": datetime.now().isoformat()}
             }
         except Exception as e:
             return self._error_state(f"Ingestion Error: {str(e)}")
+    def retrieve(self, state: AgentState) -> Dict:
         try:
+            docs = self.retriever.retrieve(state["context"]["query"])
             return {
+                "messages": [AIMessage(content=f"Found {len(docs)} relevant papers")],
+                "context": {"docs": docs}
             }
         except Exception as e:
             return self._error_state(f"Retrieval Error: {str(e)}")
+    def analyze(self, state: AgentState) -> Dict:
         try:
+            context = "\n\n".join([
+                f"### {doc.metadata['title']}\n{doc.page_content}"
+                for doc in state["context"]["docs"]
+            ])
             prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=context)
             response = self.processor.process_query(prompt)
             if "error" in response:
                 raise RuntimeError(response["error"])
+            content = response['choices'][0]['message']['content']
+            self._validate_analysis(content)
+            return {"messages": [AIMessage(content=content)]}
         except Exception as e:
             return self._error_state(f"Analysis Error: {str(e)}")
+    def validate(self, state: AgentState) -> Dict:
         validation_prompt = f"""Validate this technical analysis:
 {state["messages"][-1].content}
 Check for:
 1. Mathematical accuracy
+2. Technical depth
+3. Logical consistency
 Respond with 'VALID' or 'INVALID'"""
         response = self.processor.process_query(validation_prompt)
+        valid = "VALID" in response.get('choices', [{}])[0].get('message', {}).get('content', '')
         return {
+            "messages": [AIMessage(content=f"{state['messages'][-1].content}\n\nValidation: {'✅ Valid' if valid else '❌ Invalid'}")],
+            "context": {"valid": valid}
         }
+    def refine(self, state: AgentState) -> Dict:
         refinement_prompt = f"""Improve this analysis:
 {state["messages"][-1].content}
 Focus on:
+1. Mathematical precision
+2. Technical terminology
+3. Empirical references"""
         response = self.processor.process_query(refinement_prompt)
+        return {"messages": [AIMessage(content=response['choices'][0]['message']['content'])]}
     def _quality_check(self, state: AgentState) -> str:
         return "valid" if state.get("context", {}).get("valid", False) else "invalid"
+    def _validate_analysis(self, content: str):
         required_sections = [
+            "Key Innovations",
+            "Methodologies",
             "Empirical Results",
             "Applications",
             "Limitations"
         ]
         missing = [s for s in required_sections if f"## {s}" not in content]
         if missing:
+            raise ValueError(f"Missing sections: {', '.join(missing)}")
         if not re.search(r"\$.*?\$", content):
+            raise ValueError("Analysis lacks mathematical notation")
     def _error_state(self, message: str) -> Dict:
         return {
         }
 # ------------------------------
+# Streamlit Interface
 # ------------------------------
 class ResearchInterface:
     def __init__(self):
         self.workflow = ResearchWorkflow()
+        self._initialize()
+    def _initialize(self):
         st.set_page_config(
+            page_title="AI Research Assistant",
             layout="wide",
             initial_sidebar_state="expanded"
         )
         self._inject_styles()
         self._build_sidebar()
+        self._build_main()
     def _inject_styles(self):
         st.markdown("""
             --primary: #2ecc71;
             --secondary: #3498db;
             --background: #0a0a0a;
         }
         .stApp {
             background: var(--background);
+            color: white;
         }
         .stTextArea textarea {
             background: #1a1a1a !important;
+            border: 2px solid var(--secondary) !important;
         }
         code {
+            color: var(--primary);
             background: #002200;
             padding: 2px 4px;
         }
         </style>
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
         with st.sidebar:
+            st.title("🔬 Research Corpus")
             for key, data in ResearchConfig.DOCUMENT_MAP.items():
                 with st.expander(data["title"]):
+                    st.markdown(f"```latex\n{data['content']}\n```")
+            st.metric("Vector DB Size", len(research_docs.get()['ids']))
+    def _build_main(self):
+        st.title("🧠 AI Research Analyst")
+        query = st.text_area("Research Query:", height=150,
+                           placeholder="Enter technical question...")
+        if st.button("Analyze", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str):
         try:
+            with st.spinner("Analyzing research corpus..."):
                 result = self.workflow.app.invoke(
                     {"messages": [HumanMessage(content=query)]}
                 )
                 if result.get("context", {}).get("error"):
+                    self._show_error(result["context"]["error"])
                 else:
+                    self._display_result(result)
         except Exception as e:
             self._show_error(str(e))
+    def _display_result(self, result):
+        with st.expander("Technical Report", expanded=True):
+            st.markdown(result["messages"][-1].content)
+        with st.expander("Source Excerpts", expanded=False):
+            for doc in result["context"].get("docs", []):
                 st.markdown(f"**{doc.metadata['title']}**")
                 st.code(doc.page_content, language='latex')
     def _show_error(self, message):
         st.error(f"""
+        ⚠️ Analysis Failed
+        {message}
+        Mitigation Steps:
+        1. Simplify query complexity
+        2. Check document connections
+        3. Verify technical terms
         """)
 if __name__ == "__main__":