Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,7 @@ from langchain_core.documents import Document
|
|
9 |
from langgraph.graph import END, StateGraph
|
10 |
from typing_extensions import TypedDict, Annotated
|
11 |
from typing import Sequence, Dict, List, Optional, Any
|
12 |
-
from langgraph.graph.message import add_messages # Add this import
|
13 |
import chromadb
|
14 |
-
from chromadb.config import Settings
|
15 |
import numpy as np
|
16 |
import os
|
17 |
import streamlit as st
|
@@ -41,25 +39,22 @@ class ResearchConfig:
|
|
41 |
CHUNK_OVERLAP = 64
|
42 |
MAX_CONCURRENT_REQUESTS = 5
|
43 |
EMBEDDING_DIMENSIONS = 1536
|
44 |
-
RESEARCH_EMBEDDING = np.random.randn(1536)
|
45 |
-
TENANT = "research_tenant"
|
46 |
-
DATABASE = "ai_papers_db"
|
47 |
|
48 |
DOCUMENT_MAP = {
|
49 |
-
"CV-Transformer
|
50 |
-
"title": "Hybrid CV-Transformer
|
51 |
"content": """
|
52 |
Combines CNN feature extraction with transformer attention mechanisms.
|
53 |
Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
|
54 |
-
|
55 |
"""
|
56 |
},
|
57 |
-
"
|
58 |
-
"title": "
|
59 |
"content": """
|
60 |
-
|
61 |
-
$\
|
62 |
-
|
63 |
"""
|
64 |
}
|
65 |
}
|
@@ -76,6 +71,21 @@ Respond in MARKDOWN with:
|
|
76 |
|
77 |
Include LaTeX equations where applicable."""
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
80 |
st.error("""**Configuration Required**
|
81 |
1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
|
@@ -84,36 +94,16 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
|
|
84 |
st.stop()
|
85 |
|
86 |
# ------------------------------
|
87 |
-
# ChromaDB Document Manager (
|
88 |
# ------------------------------
|
89 |
class QuantumDocumentManager:
|
90 |
def __init__(self):
|
91 |
-
self.
|
92 |
-
chroma_db_impl="duckdb+parquet",
|
93 |
-
persist_directory=ResearchConfig.CHROMA_PATH,
|
94 |
-
anonymized_telemetry=False
|
95 |
-
)
|
96 |
-
self.client = chromadb.Client(self.client_settings)
|
97 |
-
self._initialize_tenant_db()
|
98 |
self.embeddings = OpenAIEmbeddings(
|
99 |
model="text-embedding-3-large",
|
100 |
dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
|
101 |
)
|
102 |
|
103 |
-
def _initialize_tenant_db(self):
|
104 |
-
try:
|
105 |
-
self.client.create_tenant(ResearchConfig.TENANT)
|
106 |
-
except chromadb.db.base.UniqueConstraintError:
|
107 |
-
pass # Tenant exists
|
108 |
-
|
109 |
-
try:
|
110 |
-
self.client.create_database(
|
111 |
-
ResearchConfig.DATABASE,
|
112 |
-
tenant=ResearchConfig.TENANT
|
113 |
-
)
|
114 |
-
except chromadb.db.base.UniqueConstraintError:
|
115 |
-
pass # Database exists
|
116 |
-
|
117 |
def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
|
118 |
splitter = RecursiveCharacterTextSplitter(
|
119 |
chunk_size=ResearchConfig.CHUNK_SIZE,
|
@@ -139,9 +129,6 @@ class QuantumDocumentManager:
|
|
139 |
embedding=self.embeddings,
|
140 |
collection_name=collection_name,
|
141 |
client=self.client,
|
142 |
-
tenant=ResearchConfig.TENANT,
|
143 |
-
database=ResearchConfig.DATABASE,
|
144 |
-
collection_metadata={"hnsw:space": "cosine"},
|
145 |
ids=[self._document_id(doc.page_content) for doc in docs]
|
146 |
)
|
147 |
|
|
|
9 |
from langgraph.graph import END, StateGraph
|
10 |
from typing_extensions import TypedDict, Annotated
|
11 |
from typing import Sequence, Dict, List, Optional, Any
|
|
|
12 |
import chromadb
|
|
|
13 |
import numpy as np
|
14 |
import os
|
15 |
import streamlit as st
|
|
|
39 |
CHUNK_OVERLAP = 64
|
40 |
MAX_CONCURRENT_REQUESTS = 5
|
41 |
EMBEDDING_DIMENSIONS = 1536
|
|
|
|
|
|
|
42 |
|
43 |
DOCUMENT_MAP = {
|
44 |
+
"CV-Transformer Model": {
|
45 |
+
"title": "Hybrid CV-Transformer Architecture",
|
46 |
"content": """
|
47 |
Combines CNN feature extraction with transformer attention mechanisms.
|
48 |
Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
|
49 |
+
Achieves 98.2% accuracy on ImageNet-1k with 42ms inference speed
|
50 |
"""
|
51 |
},
|
52 |
+
"Quantum ML": {
|
53 |
+
"title": "Quantum Machine Learning",
|
54 |
"content": """
|
55 |
+
Quantum-enhanced optimization techniques for ML models.
|
56 |
+
$\theta_{t+1} = \theta_t - \eta \nabla_\theta \mathcal{L}(\theta_t)$
|
57 |
+
100x speedup on optimization tasks with 58% energy reduction
|
58 |
"""
|
59 |
}
|
60 |
}
|
|
|
71 |
|
72 |
Include LaTeX equations where applicable."""
|
73 |
|
74 |
+
# Check for Chroma migration
|
75 |
+
if os.path.exists(ResearchConfig.CHROMA_PATH):
|
76 |
+
st.warning("""
|
77 |
+
**ChromDB Migration Required**
|
78 |
+
Existing Chroma database detected. Run these commands:
|
79 |
+
|
80 |
+
```bash
|
81 |
+
pip install chroma-migrate
|
82 |
+
chroma-migrate
|
83 |
+
```
|
84 |
+
|
85 |
+
Then restart the application.
|
86 |
+
""")
|
87 |
+
st.stop()
|
88 |
+
|
89 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
90 |
st.error("""**Configuration Required**
|
91 |
1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
|
|
|
94 |
st.stop()
|
95 |
|
96 |
# ------------------------------
|
97 |
+
# ChromaDB Document Manager (Updated)
|
98 |
# ------------------------------
|
99 |
class QuantumDocumentManager:
|
100 |
def __init__(self):
|
101 |
+
self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
self.embeddings = OpenAIEmbeddings(
|
103 |
model="text-embedding-3-large",
|
104 |
dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
|
105 |
)
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
|
108 |
splitter = RecursiveCharacterTextSplitter(
|
109 |
chunk_size=ResearchConfig.CHUNK_SIZE,
|
|
|
129 |
embedding=self.embeddings,
|
130 |
collection_name=collection_name,
|
131 |
client=self.client,
|
|
|
|
|
|
|
132 |
ids=[self._document_id(doc.page_content) for doc in docs]
|
133 |
)
|
134 |
|