mgbam commited on
Commit
9581cc6
·
verified ·
1 Parent(s): 812a612

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -38
app.py CHANGED
@@ -9,9 +9,7 @@ from langchain_core.documents import Document
9
  from langgraph.graph import END, StateGraph
10
  from typing_extensions import TypedDict, Annotated
11
  from typing import Sequence, Dict, List, Optional, Any
12
- from langgraph.graph.message import add_messages # Add this import
13
  import chromadb
14
- from chromadb.config import Settings
15
  import numpy as np
16
  import os
17
  import streamlit as st
@@ -41,25 +39,22 @@ class ResearchConfig:
41
  CHUNK_OVERLAP = 64
42
  MAX_CONCURRENT_REQUESTS = 5
43
  EMBEDDING_DIMENSIONS = 1536
44
- RESEARCH_EMBEDDING = np.random.randn(1536)
45
- TENANT = "research_tenant"
46
- DATABASE = "ai_papers_db"
47
 
48
  DOCUMENT_MAP = {
49
- "CV-Transformer Hybrid Architecture": {
50
- "title": "Hybrid CV-Transformer Model (98% Accuracy)",
51
  "content": """
52
  Combines CNN feature extraction with transformer attention mechanisms.
53
  Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
54
- ImageNet-1k: 98.2% Top-1 Accuracy, 42ms/inference
55
  """
56
  },
57
- "Transformer Architecture Analysis": {
58
- "title": "Transformer Architectures in NLP",
59
  "content": """
60
- Self-attention mechanisms enable parallel processing of sequences.
61
- $\text{Attention}(Q, K, V) = \text{softmax}(\frac{QK^T}{\sqrt{d_k}})V$
62
- GLUE Score: 92.4%, Training Efficiency: 1.8x vs RNNs
63
  """
64
  }
65
  }
@@ -76,6 +71,21 @@ Respond in MARKDOWN with:
76
 
77
  Include LaTeX equations where applicable."""
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  if not ResearchConfig.DEEPSEEK_API_KEY:
80
  st.error("""**Configuration Required**
81
  1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
@@ -84,36 +94,16 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
84
  st.stop()
85
 
86
  # ------------------------------
87
- # ChromaDB Document Manager (Fixed)
88
  # ------------------------------
89
  class QuantumDocumentManager:
90
  def __init__(self):
91
- self.client_settings = Settings(
92
- chroma_db_impl="duckdb+parquet",
93
- persist_directory=ResearchConfig.CHROMA_PATH,
94
- anonymized_telemetry=False
95
- )
96
- self.client = chromadb.Client(self.client_settings)
97
- self._initialize_tenant_db()
98
  self.embeddings = OpenAIEmbeddings(
99
  model="text-embedding-3-large",
100
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
101
  )
102
 
103
- def _initialize_tenant_db(self):
104
- try:
105
- self.client.create_tenant(ResearchConfig.TENANT)
106
- except chromadb.db.base.UniqueConstraintError:
107
- pass # Tenant exists
108
-
109
- try:
110
- self.client.create_database(
111
- ResearchConfig.DATABASE,
112
- tenant=ResearchConfig.TENANT
113
- )
114
- except chromadb.db.base.UniqueConstraintError:
115
- pass # Database exists
116
-
117
  def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
118
  splitter = RecursiveCharacterTextSplitter(
119
  chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -139,9 +129,6 @@ class QuantumDocumentManager:
139
  embedding=self.embeddings,
140
  collection_name=collection_name,
141
  client=self.client,
142
- tenant=ResearchConfig.TENANT,
143
- database=ResearchConfig.DATABASE,
144
- collection_metadata={"hnsw:space": "cosine"},
145
  ids=[self._document_id(doc.page_content) for doc in docs]
146
  )
147
 
 
9
  from langgraph.graph import END, StateGraph
10
  from typing_extensions import TypedDict, Annotated
11
  from typing import Sequence, Dict, List, Optional, Any
 
12
  import chromadb
 
13
  import numpy as np
14
  import os
15
  import streamlit as st
 
39
  CHUNK_OVERLAP = 64
40
  MAX_CONCURRENT_REQUESTS = 5
41
  EMBEDDING_DIMENSIONS = 1536
 
 
 
42
 
43
  DOCUMENT_MAP = {
44
+ "CV-Transformer Model": {
45
+ "title": "Hybrid CV-Transformer Architecture",
46
  "content": """
47
  Combines CNN feature extraction with transformer attention mechanisms.
48
  Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
49
+ Achieves 98.2% accuracy on ImageNet-1k with 42ms inference speed
50
  """
51
  },
52
+ "Quantum ML": {
53
+ "title": "Quantum Machine Learning",
54
  "content": """
55
+ Quantum-enhanced optimization techniques for ML models.
56
+ $\theta_{t+1} = \theta_t - \eta \nabla_\theta \mathcal{L}(\theta_t)$
57
+ 100x speedup on optimization tasks with 58% energy reduction
58
  """
59
  }
60
  }
 
71
 
72
  Include LaTeX equations where applicable."""
73
 
74
+ # Check for Chroma migration
75
+ if os.path.exists(ResearchConfig.CHROMA_PATH):
76
+ st.warning("""
77
+ **ChromDB Migration Required**
78
+ Existing Chroma database detected. Run these commands:
79
+
80
+ ```bash
81
+ pip install chroma-migrate
82
+ chroma-migrate
83
+ ```
84
+
85
+ Then restart the application.
86
+ """)
87
+ st.stop()
88
+
89
  if not ResearchConfig.DEEPSEEK_API_KEY:
90
  st.error("""**Configuration Required**
91
  1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
 
94
  st.stop()
95
 
96
  # ------------------------------
97
+ # ChromaDB Document Manager (Updated)
98
  # ------------------------------
99
  class QuantumDocumentManager:
100
  def __init__(self):
101
+ self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
 
 
 
 
 
 
102
  self.embeddings = OpenAIEmbeddings(
103
  model="text-embedding-3-large",
104
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
105
  )
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
108
  splitter = RecursiveCharacterTextSplitter(
109
  chunk_size=ResearchConfig.CHUNK_SIZE,
 
129
  embedding=self.embeddings,
130
  collection_name=collection_name,
131
  client=self.client,
 
 
 
132
  ids=[self._document_id(doc.page_content) for doc in docs]
133
  )
134