|
import re |
|
from typing import Tuple |
|
from uuid import UUID |
|
|
|
from chromadb.db.base import SqlDB |
|
from chromadb.segment import SegmentManager, VectorReader |
|
|
|
topic_regex = r"persistent:\/\/(?P<tenant>.+)\/(?P<namespace>.+)\/(?P<topic>.+)" |
|
|
|
|
|
def parse_topic_name(topic_name: str) -> Tuple[str, str, str]: |
|
"""Parse the topic name into the tenant, namespace and topic name""" |
|
match = re.match(topic_regex, topic_name) |
|
if not match: |
|
raise ValueError(f"Invalid topic name: {topic_name}") |
|
return match.group("tenant"), match.group("namespace"), match.group("topic") |
|
|
|
|
|
def create_topic_name(tenant: str, namespace: str, collection_id: UUID) -> str: |
|
return f"persistent://{tenant}/{namespace}/{str(collection_id)}" |
|
|
|
|
|
def trigger_vector_segments_max_seq_id_migration( |
|
db: SqlDB, segment_manager: SegmentManager |
|
) -> None: |
|
""" |
|
Trigger the migration of vector segments' max_seq_id from the pickled metadata file to SQLite. |
|
|
|
Vector segments migrate this field automatically on init—so this should be used when we know segments are likely unmigrated and unloaded. |
|
|
|
This is a no-op if all vector segments have already migrated their max_seq_id. |
|
""" |
|
with db.tx() as cur: |
|
cur.execute( |
|
""" |
|
SELECT collection |
|
FROM "segments" |
|
WHERE "id" NOT IN (SELECT "segment_id" FROM "max_seq_id") AND |
|
"type" = 'urn:chroma:segment/vector/hnsw-local-persisted' |
|
""" |
|
) |
|
collection_ids_with_unmigrated_segments = [row[0] for row in cur.fetchall()] |
|
|
|
if len(collection_ids_with_unmigrated_segments) == 0: |
|
return |
|
|
|
for collection_id in collection_ids_with_unmigrated_segments: |
|
|
|
segment_manager.get_segment(UUID(collection_id), VectorReader) |
|
|