jedick commited on
Commit
9ac80a4
·
1 Parent(s): 885cffb

Update email database for July 2025

Browse files
Files changed (3) hide show
  1. app.py +10 -6
  2. data.py +1 -1
  3. mods/langchain_chroma.py +3 -3
app.py CHANGED
@@ -21,12 +21,16 @@ from graph import BuildGraph
21
  load_dotenv(dotenv_path=".env", override=True)
22
 
23
  # Download model snapshots from Hugging Face Hub
24
- print(f"Downloading checkpoints for {model_id}...")
25
- ckpt_dir = snapshot_download(model_id, local_dir_use_symlinks=False)
26
- print(f"Using checkpoints from {ckpt_dir}")
27
- print(f"Downloading checkpoints for {embedding_model_id}...")
28
- embedding_ckpt_dir = snapshot_download(embedding_model_id, local_dir_use_symlinks=False)
29
- print(f"Using embedding checkpoints from {embedding_ckpt_dir}")
 
 
 
 
30
 
31
  # Download and extract data if data directory is not present
32
  if not os.path.isdir(db_dir):
 
21
  load_dotenv(dotenv_path=".env", override=True)
22
 
23
  # Download model snapshots from Hugging Face Hub
24
+ if torch.cuda.is_available():
25
+ print(f"Downloading checkpoints for {model_id}...")
26
+ ckpt_dir = snapshot_download(model_id, local_dir_use_symlinks=False)
27
+ print(f"Using checkpoints from {ckpt_dir}")
28
+ print(f"Downloading checkpoints for {embedding_model_id}...")
29
+ embedding_ckpt_dir = snapshot_download(embedding_model_id, local_dir_use_symlinks=False)
30
+ print(f"Using embedding checkpoints from {embedding_ckpt_dir}")
31
+ else:
32
+ ckpt_dir = None
33
+ embedding_ckpt_dir = None
34
 
35
  # Download and extract data if data directory is not present
36
  if not os.path.isdir(db_dir):
data.py CHANGED
@@ -45,7 +45,7 @@ def download_data():
45
 
46
  if not os.path.exists("db.zip"):
47
  # For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
48
- download_file_from_bucket("r-help-chat", "db.zip", "db.zip")
49
  ## For Dropbox (shared file - key is in URL)
50
  # shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
51
  # output_filename = "db.zip"
 
45
 
46
  if not os.path.exists("db.zip"):
47
  # For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
48
+ download_file_from_bucket("r-help-chat", "db_20250801.zip", "db.zip")
49
  ## For Dropbox (shared file - key is in URL)
50
  # shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
51
  # output_filename = "db.zip"
mods/langchain_chroma.py CHANGED
@@ -470,9 +470,6 @@ class Chroma(VectorStore):
470
 
471
  See more: https://docs.trychroma.com/reference/py-collection#query
472
  """
473
- # Possible fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
474
- # https://github.com/langchain-ai/langchain/issues/26884
475
- chromadb.api.client.SharedSystemClient.clear_system_cache()
476
  return self._collection.query(
477
  query_texts=query_texts,
478
  query_embeddings=query_embeddings, # type: ignore[arg-type]
@@ -481,6 +478,9 @@ class Chroma(VectorStore):
481
  where_document=where_document, # type: ignore[arg-type]
482
  **kwargs,
483
  )
 
 
 
484
 
485
  @staticmethod
486
  def encode_image(uri: str) -> str:
 
470
 
471
  See more: https://docs.trychroma.com/reference/py-collection#query
472
  """
 
 
 
473
  return self._collection.query(
474
  query_texts=query_texts,
475
  query_embeddings=query_embeddings, # type: ignore[arg-type]
 
478
  where_document=where_document, # type: ignore[arg-type]
479
  **kwargs,
480
  )
481
+ # Possible fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
482
+ # https://github.com/langchain-ai/langchain/issues/26884
483
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
484
 
485
  @staticmethod
486
  def encode_image(uri: str) -> str: