Spaces:
Runtime error
Runtime error
File size: 3,961 Bytes
acc4ffe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
"""Test functionality related to combining documents."""
from typing import Any, List, Tuple
import pytest
from langchain.chains.combine_documents.map_reduce import (
_collapse_docs,
_split_list_of_docs,
)
from langchain.docstore.document import Document
def _fake_docs_len_func(docs: List[Document]) -> int:
return len(_fake_combine_docs_func(docs)[0])
def _fake_combine_docs_func(docs: List[Document], **kwargs: Any) -> Tuple[str, dict]:
return "".join([d.page_content for d in docs]), {}
def test__split_list_long_single_doc() -> None:
"""Test splitting of a long single doc."""
docs = [Document(page_content="foo" * 100)]
with pytest.raises(ValueError):
_split_list_of_docs(docs, _fake_docs_len_func, 100)
def test__split_list_long_pair_doc() -> None:
"""Test splitting of a list with two medium docs."""
docs = [Document(page_content="foo" * 30)] * 2
with pytest.raises(ValueError):
_split_list_of_docs(docs, _fake_docs_len_func, 100)
def test__split_list_single_doc() -> None:
"""Test splitting works with just a single doc."""
docs = [Document(page_content="foo")]
doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 100)
assert doc_list == [docs]
def test__split_list_double_doc() -> None:
"""Test splitting works with just two docs."""
docs = [Document(page_content="foo"), Document(page_content="bar")]
doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 100)
assert doc_list == [docs]
def test__split_list_works_correctly() -> None:
"""Test splitting works correctly."""
docs = [
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="baz"),
Document(page_content="foo" * 2),
Document(page_content="bar"),
Document(page_content="baz"),
]
doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 10)
expected_result = [
# Test a group of three.
[
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="baz"),
],
# Test a group of two, where one is bigger.
[Document(page_content="foo" * 2), Document(page_content="bar")],
# Test no errors on last
[Document(page_content="baz")],
]
assert doc_list == expected_result
def test__collapse_docs_no_metadata() -> None:
"""Test collapse documents functionality when no metadata."""
docs = [
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="baz"),
]
output = _collapse_docs(docs, _fake_combine_docs_func)
expected_output = Document(page_content="foobarbaz")
assert output == expected_output
def test__collapse_docs_one_doc() -> None:
"""Test collapse documents functionality when only one document present."""
# Test with no metadata.
docs = [Document(page_content="foo")]
output = _collapse_docs(docs, _fake_combine_docs_func)
assert output == docs[0]
# Test with metadata.
docs = [Document(page_content="foo", metadata={"source": "a"})]
output = _collapse_docs(docs, _fake_combine_docs_func)
assert output == docs[0]
def test__collapse_docs_metadata() -> None:
"""Test collapse documents functionality when metadata exists."""
metadata1 = {"source": "a", "foo": 2, "bar": "1", "extra1": "foo"}
metadata2 = {"source": "b", "foo": "3", "bar": 2, "extra2": "bar"}
docs = [
Document(page_content="foo", metadata=metadata1),
Document(page_content="bar", metadata=metadata2),
]
output = _collapse_docs(docs, _fake_combine_docs_func)
expected_metadata = {
"source": "a, b",
"foo": "2, 3",
"bar": "1, 2",
"extra1": "foo",
"extra2": "bar",
}
expected_output = Document(page_content="foobar", metadata=expected_metadata)
assert output == expected_output
|