davanstrien HF staff commited on
Commit
79269c4
·
1 Parent(s): 4d0e134
Files changed (1) hide show
  1. utils.py +48 -0
utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import platform
4
+ from typing import Literal
5
+
6
+ import chromadb
7
+ from dotenv import load_dotenv
8
+
9
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
10
+
11
+
12
+ # Set up logging
13
+ logging.basicConfig(
14
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
15
+ )
16
+ logger = logging.getLogger(__name__)
17
+
18
+ load_dotenv()
19
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
20
+ # Set up logging
21
+ logging.basicConfig(
22
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
23
+ )
24
+ logger = logging.getLogger(__name__)
25
+
26
+ load_dotenv()
27
+
28
+
29
+ def get_save_path() -> Literal["chroma/"] | Literal["/data/chroma/"]:
30
+ path = "chroma/" if platform.system() == "Darwin" else "/data/chroma/"
31
+ logger.info(f"Using save path: {path}")
32
+ return path
33
+
34
+
35
+ def get_chroma_client():
36
+ logger.info("Initializing Chroma client")
37
+ SAVE_PATH = get_save_path()
38
+ return chromadb.PersistentClient(path=SAVE_PATH)
39
+
40
+
41
+ def get_collection(chroma_client, embedding_function, collection_name):
42
+ logger.info(f"Getting or creating collection: {collection_name}")
43
+ return chroma_client.create_collection(
44
+ name=collection_name,
45
+ get_or_create=True,
46
+ embedding_function=embedding_function,
47
+ metadata={"hnsw:space": "cosine"},
48
+ )