Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 13

Commit

9581cc6

verified ·

1 Parent(s): 812a612

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -38

app.py CHANGED Viewed

@@ -9,9 +9,7 @@ from langchain_core.documents import Document
 from langgraph.graph import END, StateGraph
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
-from langgraph.graph.message import add_messages  # Add this import
 import chromadb
-from chromadb.config import Settings
 import numpy as np
 import os
 import streamlit as st
@@ -41,25 +39,22 @@ class ResearchConfig:
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
-    RESEARCH_EMBEDDING = np.random.randn(1536)
-    TENANT = "research_tenant"
-    DATABASE = "ai_papers_db"
     DOCUMENT_MAP = {
-        "CV-Transformer Hybrid Architecture": {
-            "title": "Hybrid CV-Transformer Model (98% Accuracy)",
             "content": """
             Combines CNN feature extraction with transformer attention mechanisms.
             Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
-            ImageNet-1k: 98.2% Top-1 Accuracy, 42ms/inference
             """
         },
-        "Transformer Architecture Analysis": {
-            "title": "Transformer Architectures in NLP",
             "content": """
-            Self-attention mechanisms enable parallel processing of sequences.
-            $\text{Attention}(Q, K, V) = \text{softmax}(\frac{QK^T}{\sqrt{d_k}})V$
-            GLUE Score: 92.4%, Training Efficiency: 1.8x vs RNNs
             """
         }
     }
@@ -76,6 +71,21 @@ Respond in MARKDOWN with:
 Include LaTeX equations where applicable."""
 if not ResearchConfig.DEEPSEEK_API_KEY:
     st.error("""**Configuration Required**
     1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
@@ -84,36 +94,16 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
     st.stop()
 # ------------------------------
-# ChromaDB Document Manager (Fixed)
 # ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
-        self.client_settings = Settings(
-            chroma_db_impl="duckdb+parquet",
-            persist_directory=ResearchConfig.CHROMA_PATH,
-            anonymized_telemetry=False
-        )
-        self.client = chromadb.Client(self.client_settings)
-        self._initialize_tenant_db()
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
-    def _initialize_tenant_db(self):
-        try:
-            self.client.create_tenant(ResearchConfig.TENANT)
-        except chromadb.db.base.UniqueConstraintError:
-            pass  # Tenant exists
-        try:
-            self.client.create_database(
-                ResearchConfig.DATABASE,
-                tenant=ResearchConfig.TENANT
-            )
-        except chromadb.db.base.UniqueConstraintError:
-            pass  # Database exists
     def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -139,9 +129,6 @@ class QuantumDocumentManager:
             embedding=self.embeddings,
             collection_name=collection_name,
             client=self.client,
-            tenant=ResearchConfig.TENANT,
-            database=ResearchConfig.DATABASE,
-            collection_metadata={"hnsw:space": "cosine"},
             ids=[self._document_id(doc.page_content) for doc in docs]
         )

 from langgraph.graph import END, StateGraph
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
 import numpy as np
 import os
 import streamlit as st
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
     DOCUMENT_MAP = {
+        "CV-Transformer Model": {
+            "title": "Hybrid CV-Transformer Architecture",
             "content": """
             Combines CNN feature extraction with transformer attention mechanisms.
             Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
+            Achieves 98.2% accuracy on ImageNet-1k with 42ms inference speed
             """
         },
+        "Quantum ML": {
+            "title": "Quantum Machine Learning",
             "content": """
+            Quantum-enhanced optimization techniques for ML models.
+            $\theta_{t+1} = \theta_t - \eta \nabla_\theta \mathcal{L}(\theta_t)$
+            100x speedup on optimization tasks with 58% energy reduction
             """
         }
     }
 Include LaTeX equations where applicable."""
+# Check for Chroma migration
+if os.path.exists(ResearchConfig.CHROMA_PATH):
+    st.warning("""
+    **ChromDB Migration Required**
+    Existing Chroma database detected. Run these commands:
+    ```bash
+    pip install chroma-migrate
+    chroma-migrate
+    ```
+    Then restart the application.
+    """)
+    st.stop()
 if not ResearchConfig.DEEPSEEK_API_KEY:
     st.error("""**Configuration Required**
     1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
     st.stop()
 # ------------------------------
+# ChromaDB Document Manager (Updated)
 # ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
+        self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
     def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
             embedding=self.embeddings,
             collection_name=collection_name,
             client=self.client,
             ids=[self._document_id(doc.page_content) for doc in docs]
         )