docker-test / llm_engineering /domain /embedded_chunks.py
SkazuHD's picture
init space
d660b02
from abc import ABC
from pydantic import UUID4, Field
from llm_engineering.domain.types import DataCategory
from .base import VectorBaseDocument
class EmbeddedChunk(VectorBaseDocument, ABC):
content: str
embedding: list[float] | None
platform: str
document_id: UUID4
author_id: UUID4
author_full_name: str
metadata: dict = Field(default_factory=dict)
@classmethod
def to_context(cls, chunks: list["EmbeddedChunk"]) -> str:
context = ""
for i, chunk in enumerate(chunks):
context += f"""
Source: {chunk.name}
Link: {chunk.link}
Content: {chunk.content}\n
"""
print(chunk)
return context
class EmbeddedPostChunk(EmbeddedChunk):
class Config:
name = "embedded_posts"
category = DataCategory.POSTS
use_vector_index = True
class EmbeddedArticleChunk(EmbeddedChunk):
link: str
class Config:
name = "embedded_articles"
category = DataCategory.ARTICLES
use_vector_index = True
class EmbeddedRepositoryChunk(EmbeddedChunk):
name: str
link: str
class Config:
name = "embedded_repositories"
category = DataCategory.REPOSITORIES
use_vector_index = True