|
from typing import ( |
|
TYPE_CHECKING, |
|
Optional, |
|
Union, |
|
) |
|
import numpy as np |
|
|
|
from chromadb.api.types import ( |
|
URI, |
|
CollectionMetadata, |
|
Embedding, |
|
PyEmbedding, |
|
Include, |
|
Metadata, |
|
Document, |
|
Image, |
|
Where, |
|
IDs, |
|
GetResult, |
|
QueryResult, |
|
ID, |
|
OneOrMany, |
|
WhereDocument, |
|
) |
|
|
|
from chromadb.api.models.CollectionCommon import CollectionCommon |
|
|
|
if TYPE_CHECKING: |
|
from chromadb.api import AsyncServerAPI |
|
|
|
|
|
class AsyncCollection(CollectionCommon["AsyncServerAPI"]): |
|
async def add( |
|
self, |
|
ids: OneOrMany[ID], |
|
embeddings: Optional[ |
|
Union[ |
|
OneOrMany[Embedding], |
|
OneOrMany[PyEmbedding], |
|
] |
|
] = None, |
|
metadatas: Optional[OneOrMany[Metadata]] = None, |
|
documents: Optional[OneOrMany[Document]] = None, |
|
images: Optional[OneOrMany[Image]] = None, |
|
uris: Optional[OneOrMany[URI]] = None, |
|
) -> None: |
|
"""Add embeddings to the data store. |
|
Args: |
|
ids: The ids of the embeddings you wish to add |
|
embeddings: The embeddings to add. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional. |
|
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. |
|
documents: The documents to associate with the embeddings. Optional. |
|
images: The images to associate with the embeddings. Optional. |
|
uris: The uris of the images to associate with the embeddings. Optional. |
|
|
|
Returns: |
|
None |
|
|
|
Raises: |
|
ValueError: If you don't provide either embeddings or documents |
|
ValueError: If the length of ids, embeddings, metadatas, or documents don't match |
|
ValueError: If you don't provide an embedding function and don't provide embeddings |
|
ValueError: If you provide both embeddings and documents |
|
ValueError: If you provide an id that already exists |
|
|
|
""" |
|
( |
|
ids, |
|
embeddings, |
|
metadatas, |
|
documents, |
|
uris, |
|
) = self._validate_and_prepare_embedding_set( |
|
ids, embeddings, metadatas, documents, images, uris |
|
) |
|
|
|
await self._client._add(ids, self.id, embeddings, metadatas, documents, uris) |
|
|
|
async def count(self) -> int: |
|
"""The total number of embeddings added to the database |
|
|
|
Returns: |
|
int: The total number of embeddings added to the database |
|
|
|
""" |
|
return await self._client._count(collection_id=self.id) |
|
|
|
async def get( |
|
self, |
|
ids: Optional[OneOrMany[ID]] = None, |
|
where: Optional[Where] = None, |
|
limit: Optional[int] = None, |
|
offset: Optional[int] = None, |
|
where_document: Optional[WhereDocument] = None, |
|
include: Include = ["metadatas", "documents"], |
|
) -> GetResult: |
|
"""Get embeddings and their associate data from the data store. If no ids or where filter is provided returns |
|
all embeddings up to limit starting at offset. |
|
|
|
Args: |
|
ids: The ids of the embeddings to get. Optional. |
|
where: A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional. |
|
limit: The number of documents to return. Optional. |
|
offset: The offset to start returning results from. Useful for paging results with limit. Optional. |
|
where_document: A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional. |
|
include: A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`. Ids are always included. Defaults to `["metadatas", "documents"]`. Optional. |
|
|
|
Returns: |
|
GetResult: A GetResult object containing the results. |
|
|
|
""" |
|
( |
|
valid_ids, |
|
valid_where, |
|
valid_where_document, |
|
valid_include, |
|
) = self._validate_and_prepare_get_request(ids, where, where_document, include) |
|
|
|
get_results = await self._client._get( |
|
self.id, |
|
valid_ids, |
|
valid_where, |
|
None, |
|
limit, |
|
offset, |
|
where_document=valid_where_document, |
|
include=valid_include, |
|
) |
|
|
|
return self._transform_get_response(get_results, valid_include) |
|
|
|
async def peek(self, limit: int = 10) -> GetResult: |
|
"""Get the first few results in the database up to limit |
|
|
|
Args: |
|
limit: The number of results to return. |
|
|
|
Returns: |
|
GetResult: A GetResult object containing the results. |
|
""" |
|
return self._transform_peek_response(await self._client._peek(self.id, limit)) |
|
|
|
async def query( |
|
self, |
|
query_embeddings: Optional[ |
|
Union[ |
|
OneOrMany[Embedding], |
|
OneOrMany[np.ndarray], |
|
] |
|
] = None, |
|
query_texts: Optional[OneOrMany[Document]] = None, |
|
query_images: Optional[OneOrMany[Image]] = None, |
|
query_uris: Optional[OneOrMany[URI]] = None, |
|
n_results: int = 10, |
|
where: Optional[Where] = None, |
|
where_document: Optional[WhereDocument] = None, |
|
include: Include = ["metadatas", "documents", "distances"], |
|
) -> QueryResult: |
|
"""Get the n_results nearest neighbor embeddings for provided query_embeddings or query_texts. |
|
|
|
Args: |
|
query_embeddings: The embeddings to get the closes neighbors of. Optional. |
|
query_texts: The document texts to get the closes neighbors of. Optional. |
|
query_images: The images to get the closes neighbors of. Optional. |
|
n_results: The number of neighbors to return for each query_embedding or query_texts. Optional. |
|
where: A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional. |
|
where_document: A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional. |
|
include: A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`, `"distances"`. Ids are always included. Defaults to `["metadatas", "documents", "distances"]`. Optional. |
|
|
|
Returns: |
|
QueryResult: A QueryResult object containing the results. |
|
|
|
Raises: |
|
ValueError: If you don't provide either query_embeddings, query_texts, or query_images |
|
ValueError: If you provide both query_embeddings and query_texts |
|
ValueError: If you provide both query_embeddings and query_images |
|
ValueError: If you provide both query_texts and query_images |
|
|
|
""" |
|
|
|
( |
|
valid_query_embeddings, |
|
valid_n_results, |
|
valid_where, |
|
valid_where_document, |
|
) = self._validate_and_prepare_query_request( |
|
query_embeddings, |
|
query_texts, |
|
query_images, |
|
query_uris, |
|
n_results, |
|
where, |
|
where_document, |
|
include, |
|
) |
|
|
|
query_results = await self._client._query( |
|
collection_id=self.id, |
|
query_embeddings=valid_query_embeddings, |
|
n_results=valid_n_results, |
|
where=valid_where, |
|
where_document=valid_where_document, |
|
include=include, |
|
) |
|
|
|
return self._transform_query_response(query_results, include) |
|
|
|
async def modify( |
|
self, name: Optional[str] = None, metadata: Optional[CollectionMetadata] = None |
|
) -> None: |
|
"""Modify the collection name or metadata |
|
|
|
Args: |
|
name: The updated name for the collection. Optional. |
|
metadata: The updated metadata for the collection. Optional. |
|
|
|
Returns: |
|
None |
|
""" |
|
|
|
self._validate_modify_request(metadata) |
|
|
|
|
|
|
|
|
|
await self._client._modify(id=self.id, new_name=name, new_metadata=metadata) |
|
|
|
self._update_model_after_modify_success(name, metadata) |
|
|
|
async def update( |
|
self, |
|
ids: OneOrMany[ID], |
|
embeddings: Optional[ |
|
Union[ |
|
OneOrMany[Embedding], |
|
OneOrMany[np.ndarray], |
|
] |
|
] = None, |
|
metadatas: Optional[OneOrMany[Metadata]] = None, |
|
documents: Optional[OneOrMany[Document]] = None, |
|
images: Optional[OneOrMany[Image]] = None, |
|
uris: Optional[OneOrMany[URI]] = None, |
|
) -> None: |
|
"""Update the embeddings, metadatas or documents for provided ids. |
|
|
|
Args: |
|
ids: The ids of the embeddings to update |
|
embeddings: The embeddings to update. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional. |
|
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. |
|
documents: The documents to associate with the embeddings. Optional. |
|
images: The images to associate with the embeddings. Optional. |
|
Returns: |
|
None |
|
""" |
|
( |
|
ids, |
|
embeddings, |
|
metadatas, |
|
documents, |
|
uris, |
|
) = self._validate_and_prepare_update_request( |
|
ids, embeddings, metadatas, documents, images, uris |
|
) |
|
|
|
await self._client._update(self.id, ids, embeddings, metadatas, documents, uris) |
|
|
|
async def upsert( |
|
self, |
|
ids: OneOrMany[ID], |
|
embeddings: Optional[ |
|
Union[ |
|
OneOrMany[Embedding], |
|
OneOrMany[np.ndarray], |
|
] |
|
] = None, |
|
metadatas: Optional[OneOrMany[Metadata]] = None, |
|
documents: Optional[OneOrMany[Document]] = None, |
|
images: Optional[OneOrMany[Image]] = None, |
|
uris: Optional[OneOrMany[URI]] = None, |
|
) -> None: |
|
"""Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. |
|
|
|
Args: |
|
ids: The ids of the embeddings to update |
|
embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. |
|
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. |
|
documents: The documents to associate with the embeddings. Optional. |
|
|
|
Returns: |
|
None |
|
""" |
|
( |
|
ids, |
|
embeddings, |
|
metadatas, |
|
documents, |
|
uris, |
|
) = self._validate_and_prepare_upsert_request( |
|
ids, embeddings, metadatas, documents, images, uris |
|
) |
|
|
|
await self._client._upsert( |
|
collection_id=self.id, |
|
ids=ids, |
|
embeddings=embeddings, |
|
metadatas=metadatas, |
|
documents=documents, |
|
uris=uris, |
|
) |
|
|
|
async def delete( |
|
self, |
|
ids: Optional[IDs] = None, |
|
where: Optional[Where] = None, |
|
where_document: Optional[WhereDocument] = None, |
|
) -> None: |
|
"""Delete the embeddings based on ids and/or a where filter |
|
|
|
Args: |
|
ids: The ids of the embeddings to delete |
|
where: A Where type dict used to filter the delection by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional. |
|
where_document: A WhereDocument type dict used to filter the deletion by the document content. E.g. `{$contains: {"text": "hello"}}`. Optional. |
|
|
|
Returns: |
|
None |
|
|
|
Raises: |
|
ValueError: If you don't provide either ids, where, or where_document |
|
""" |
|
(ids, where, where_document) = self._validate_and_prepare_delete_request( |
|
ids, where, where_document |
|
) |
|
|
|
await self._client._delete(self.id, ids, where, where_document) |
|
|