rag_lite / tests /test_embed.py
EL GHAFRAOUI AYOUB
C
54f5afe
"""Test RAGLite's embedding functionality."""
from pathlib import Path
import numpy as np
from raglite import RAGLiteConfig
from raglite._embed import embed_sentences
from raglite._markdown import document_to_markdown
from raglite._split_sentences import split_sentences
def test_embed(embedder: str) -> None:
"""Test embedding a document."""
raglite_test_config = RAGLiteConfig(embedder=embedder, embedder_normalize=True)
doc_path = Path(__file__).parent / "specrel.pdf" # Einstein's special relativity paper.
doc = document_to_markdown(doc_path)
sentences = split_sentences(doc, max_len=raglite_test_config.chunk_max_size)
sentence_embeddings = embed_sentences(sentences, config=raglite_test_config)
assert isinstance(sentences, list)
assert isinstance(sentence_embeddings, np.ndarray)
assert len(sentences) == len(sentence_embeddings)
assert sentence_embeddings.shape[1] >= 128 # noqa: PLR2004
assert sentence_embeddings.dtype == np.float16
assert np.all(np.isfinite(sentence_embeddings))
assert np.allclose(np.linalg.norm(sentence_embeddings, axis=1), 1.0, rtol=1e-3)