File size: 1,131 Bytes
54f5afe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
"""Test RAGLite's embedding functionality."""

from pathlib import Path

import numpy as np

from raglite import RAGLiteConfig
from raglite._embed import embed_sentences
from raglite._markdown import document_to_markdown
from raglite._split_sentences import split_sentences


def test_embed(embedder: str) -> None:
    """Test embedding a document."""
    raglite_test_config = RAGLiteConfig(embedder=embedder, embedder_normalize=True)
    doc_path = Path(__file__).parent / "specrel.pdf"  # Einstein's special relativity paper.
    doc = document_to_markdown(doc_path)
    sentences = split_sentences(doc, max_len=raglite_test_config.chunk_max_size)
    sentence_embeddings = embed_sentences(sentences, config=raglite_test_config)
    assert isinstance(sentences, list)
    assert isinstance(sentence_embeddings, np.ndarray)
    assert len(sentences) == len(sentence_embeddings)
    assert sentence_embeddings.shape[1] >= 128  # noqa: PLR2004
    assert sentence_embeddings.dtype == np.float16
    assert np.all(np.isfinite(sentence_embeddings))
    assert np.allclose(np.linalg.norm(sentence_embeddings, axis=1), 1.0, rtol=1e-3)