File size: 1,664 Bytes
03a7adf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import chromadb
from chromadb.utils import embedding_functions
from typing import Optional, Dict
def createVectorDB(
collection_name: Optional[str],
chroma_data_path: Optional[str] = None,
embed_model: Optional[str] = "all-MiniLM-L6-v2",
metadata: Optional[Dict[str, str]] = None
) -> chromadb.Collection:
"""Creates the vector database to store embeddings.
Args:
collection_name (str): The name of the collection.
chroma_data_path (Optional[str]): Path for chroma embeddings.
embed_model (Optional[str]): Model name for embeddings.
metadata (Optional[Dict[str, str]]): Metadata for the collection.
Returns:
chromadb.Collection: The created collection object.
"""
if chroma_data_path is None:
chroma_data_path = r"CHROMA_EMBEDDINGS_PATH" # Default path if not provided
client = chromadb.PersistentClient(path=chroma_data_path)
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=embed_model
)
# Use provided metadata or default to empty dictionary
if metadata is None:
metadata = {"hnsw:space": "cosine"}
collection = client.create_collection(
collection_name=collection_name,
embedding_function=embedding_func,
metadata=metadata,
)
return collection
#unsure how to create unittest
#collection = createVectorDB(
#COLLECTION_NAME="123456789",
#C#HROMA_DATA_PATH=r"C:\Users\navan\Downloads\BioModelsRAG\CHROMA_EMBEDDINGS_PATH",
#EMBED_MODEL="all-MiniLM-L6-v2",
#metadata={"hnsw:space": "cosine"}
|