Spaces:
Build error
Build error
import re | |
from typing import Tuple | |
from uuid import UUID | |
from chromadb.db.base import SqlDB | |
from chromadb.segment import SegmentManager, VectorReader | |
topic_regex = r"persistent:\/\/(?P<tenant>.+)\/(?P<namespace>.+)\/(?P<topic>.+)" | |
def parse_topic_name(topic_name: str) -> Tuple[str, str, str]: | |
"""Parse the topic name into the tenant, namespace and topic name""" | |
match = re.match(topic_regex, topic_name) | |
if not match: | |
raise ValueError(f"Invalid topic name: {topic_name}") | |
return match.group("tenant"), match.group("namespace"), match.group("topic") | |
def create_topic_name(tenant: str, namespace: str, collection_id: UUID) -> str: | |
return f"persistent://{tenant}/{namespace}/{str(collection_id)}" | |
def trigger_vector_segments_max_seq_id_migration( | |
db: SqlDB, segment_manager: SegmentManager | |
) -> None: | |
""" | |
Trigger the migration of vector segments' max_seq_id from the pickled metadata file to SQLite. | |
Vector segments migrate this field automatically on init—so this should be used when we know segments are likely unmigrated and unloaded. | |
This is a no-op if all vector segments have already migrated their max_seq_id. | |
""" | |
with db.tx() as cur: | |
cur.execute( | |
""" | |
SELECT collection | |
FROM "segments" | |
WHERE "id" NOT IN (SELECT "segment_id" FROM "max_seq_id") AND | |
"type" = 'urn:chroma:segment/vector/hnsw-local-persisted' | |
""" | |
) | |
collection_ids_with_unmigrated_segments = [row[0] for row in cur.fetchall()] | |
if len(collection_ids_with_unmigrated_segments) == 0: | |
return | |
for collection_id in collection_ids_with_unmigrated_segments: | |
# Loading the segment triggers the migration on init | |
segment_manager.get_segment(UUID(collection_id), VectorReader) | |