import hashlib | |
import uuid | |
from langchain.schema import Document | |
from qdrant_client import QdrantClient | |
from typing import List | |
def check_collection_exists(client: QdrantClient, collection_name: str) -> bool: | |
try: | |
# this is dumb, but it works. Not sure why get_collection raises an error if the collection doesn't exist. | |
client.get_collection(collection_name) is not None | |
return True | |
except ValueError: | |
return False | |
def get_document_hash_as_uuid(doc): | |
content_hash = hashlib.sha256(doc.page_content.encode()).hexdigest() | |
uuid_from_hash = uuid.UUID(content_hash[:32]) | |
return str(uuid_from_hash) | |
def enrich_document_metadata(doc: Document, **additional_metadata) -> Document: | |
doc.metadata.update(additional_metadata) | |
return doc | |