Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
9ac80a4
1
Parent(s):
885cffb
Update email database for July 2025
Browse files- app.py +10 -6
- data.py +1 -1
- mods/langchain_chroma.py +3 -3
app.py
CHANGED
@@ -21,12 +21,16 @@ from graph import BuildGraph
|
|
21 |
load_dotenv(dotenv_path=".env", override=True)
|
22 |
|
23 |
# Download model snapshots from Hugging Face Hub
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
print(f"
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Download and extract data if data directory is not present
|
32 |
if not os.path.isdir(db_dir):
|
|
|
21 |
load_dotenv(dotenv_path=".env", override=True)
|
22 |
|
23 |
# Download model snapshots from Hugging Face Hub
|
24 |
+
if torch.cuda.is_available():
|
25 |
+
print(f"Downloading checkpoints for {model_id}...")
|
26 |
+
ckpt_dir = snapshot_download(model_id, local_dir_use_symlinks=False)
|
27 |
+
print(f"Using checkpoints from {ckpt_dir}")
|
28 |
+
print(f"Downloading checkpoints for {embedding_model_id}...")
|
29 |
+
embedding_ckpt_dir = snapshot_download(embedding_model_id, local_dir_use_symlinks=False)
|
30 |
+
print(f"Using embedding checkpoints from {embedding_ckpt_dir}")
|
31 |
+
else:
|
32 |
+
ckpt_dir = None
|
33 |
+
embedding_ckpt_dir = None
|
34 |
|
35 |
# Download and extract data if data directory is not present
|
36 |
if not os.path.isdir(db_dir):
|
data.py
CHANGED
@@ -45,7 +45,7 @@ def download_data():
|
|
45 |
|
46 |
if not os.path.exists("db.zip"):
|
47 |
# For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
|
48 |
-
download_file_from_bucket("r-help-chat", "
|
49 |
## For Dropbox (shared file - key is in URL)
|
50 |
# shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
|
51 |
# output_filename = "db.zip"
|
|
|
45 |
|
46 |
if not os.path.exists("db.zip"):
|
47 |
# For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
|
48 |
+
download_file_from_bucket("r-help-chat", "db_20250801.zip", "db.zip")
|
49 |
## For Dropbox (shared file - key is in URL)
|
50 |
# shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
|
51 |
# output_filename = "db.zip"
|
mods/langchain_chroma.py
CHANGED
@@ -470,9 +470,6 @@ class Chroma(VectorStore):
|
|
470 |
|
471 |
See more: https://docs.trychroma.com/reference/py-collection#query
|
472 |
"""
|
473 |
-
# Possible fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
|
474 |
-
# https://github.com/langchain-ai/langchain/issues/26884
|
475 |
-
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
476 |
return self._collection.query(
|
477 |
query_texts=query_texts,
|
478 |
query_embeddings=query_embeddings, # type: ignore[arg-type]
|
@@ -481,6 +478,9 @@ class Chroma(VectorStore):
|
|
481 |
where_document=where_document, # type: ignore[arg-type]
|
482 |
**kwargs,
|
483 |
)
|
|
|
|
|
|
|
484 |
|
485 |
@staticmethod
|
486 |
def encode_image(uri: str) -> str:
|
|
|
470 |
|
471 |
See more: https://docs.trychroma.com/reference/py-collection#query
|
472 |
"""
|
|
|
|
|
|
|
473 |
return self._collection.query(
|
474 |
query_texts=query_texts,
|
475 |
query_embeddings=query_embeddings, # type: ignore[arg-type]
|
|
|
478 |
where_document=where_document, # type: ignore[arg-type]
|
479 |
**kwargs,
|
480 |
)
|
481 |
+
# Possible fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
|
482 |
+
# https://github.com/langchain-ai/langchain/issues/26884
|
483 |
+
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
484 |
|
485 |
@staticmethod
|
486 |
def encode_image(uri: str) -> str:
|