colibri.qdrant / tests /openapi /test_group.py
Gouzi Mohaled
Ajout du dossier tests
3932407
import pytest
import jsons
from pytest_cases import parametrize, fixture_ref
from .helpers.collection_setup import basic_collection_setup, drop_collection
from .helpers.helpers import request_with_validation
@pytest.fixture(scope='module', autouse=True)
def lookup_collection_name(collection_name) -> str:
return f"{collection_name}_lookup"
@pytest.fixture(scope='module', autouse=True)
def set_serializer(lookup_collection_name):
def custom_serializer(obj: fixture_ref, **kwargs) -> str:
return lookup_collection_name
jsons.set_serializer(custom_serializer, fixture_ref)
def upsert_chunked_docs(collection_name, docs=50, chunks=5):
points = []
for doc in range(docs):
for chunk in range(chunks):
doc_id = doc
i = doc * chunks + chunk
p = {"id": i, "vector": [1.0, 0.0, 0.0, 0.0], "payload": {"docId": doc_id}}
points.append(p)
response = request_with_validation(
api="/collections/{collection_name}/points",
method="PUT",
path_params={"collection_name": collection_name},
query_params={"wait": "true"},
body={"points": points},
)
assert response.ok
def upsert_points_with_array_fields(collection_name, docs=3, chunks=5, id_offset=5000):
points = []
for doc in range(docs):
for chunk in range(chunks):
doc_ids = [f"valid_{doc}", f"valid_too_{doc}"]
i = doc * chunks + chunk + id_offset
p = {
"id": i,
"vector": [0.0, 1.0, 0.0, 0.0],
"payload": {"multiId": doc_ids},
}
points.append(p)
response = request_with_validation(
api="/collections/{collection_name}/points",
method="PUT",
path_params={"collection_name": collection_name},
query_params={"wait": "true"},
body={"points": points},
)
assert response.ok
def upsert_with_heterogenous_fields(collection_name):
points = [
{"id": 6000, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": "string"}}, # ok -> string
{"id": 6001, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": 123}}, # ok -> 123
{"id": 6002, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": [1, 2, 3]}}, # ok -> 1
{"id": 6003, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": ["a", "b", "c"]}}, # ok -> "a"
{"id": 6004, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": 2.42}}, # ok -> "2.42"
{"id": 6005, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": [["a", "b", "c"]]}}, # invalid
{"id": 6006, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": {"object": "string"}}}, # invalid
{"id": 6007, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": []}}, # invalid
{"id": 6008, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": None}}, # invalid
]
response = request_with_validation(
api="/collections/{collection_name}/points",
method="PUT",
path_params={"collection_name": collection_name},
query_params={"wait": "true"},
body={"points": points},
)
assert response.ok
def upsert_multi_value_payload(collection_name):
points = [
{"id": 9000 + i, "vector": [0.0, 0.0, 1.0, 1.0], "payload": {"mkey": ["a"]}}
for i in range(100)
] + [
{"id": 9100 + i, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"mkey": ["a", "b"]}}
for i in range(10)
]
response = request_with_validation(
api="/collections/{collection_name}/points",
method="PUT",
path_params={"collection_name": collection_name},
query_params={"wait": "true"},
body={"points": points},
)
assert response.ok
def upsert_doc_points(collection_name, docs=50):
points = [
{"id": i, "vector": [1.0, 0.0, 0.0, 0.0], "payload": {"body": f"doc body {i}"}}
for i in range(100)
]
response = request_with_validation(
api="/collections/{collection_name}/points",
method="PUT",
path_params={"collection_name": collection_name},
query_params={"wait": "true"},
body={"points": points},
)
assert response.ok
@pytest.fixture(autouse=True, scope="module")
def setup(on_disk_vectors, collection_name, lookup_collection_name):
basic_collection_setup(collection_name=collection_name, on_disk_vectors=on_disk_vectors)
upsert_chunked_docs(collection_name=collection_name)
upsert_points_with_array_fields(collection_name=collection_name)
upsert_with_heterogenous_fields(collection_name=collection_name)
upsert_multi_value_payload(collection_name=collection_name)
basic_collection_setup(collection_name=lookup_collection_name, on_disk_vectors=on_disk_vectors)
upsert_doc_points(collection_name=lookup_collection_name)
yield
drop_collection(collection_name=collection_name)
drop_collection(collection_name=lookup_collection_name)
def test_search_with_multiple_groups(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [0.0, 0.0, 1.0, 1.0],
"limit": 2,
"with_payload": True,
"group_by": "mkey",
"group_size": 2,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 2
assert groups[0]["id"] == "a"
assert groups[1]["id"] == "b"
def test_search(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 10
for g in groups:
assert len(g["hits"]) == 3
for h in g["hits"]:
assert h["payload"]["docId"] == g["id"]
def test_recommend(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/recommend/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [5, 10, 15],
"negative": [6, 11, 16],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 10
for g in groups:
assert len(g["hits"]) == 3
for h in g["hits"]:
assert h["payload"]["docId"] == g["id"]
def test_with_vectors(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 5,
"with_payload": True,
"with_vector": True,
"group_by": "docId",
"group_size": 3,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 5
for g in groups:
assert len(g["hits"]) == 3
for h in g["hits"]:
assert h["payload"]["docId"] == g["id"]
assert h["vector"] == [1.0, 0.0, 0.0, 0.0]
def test_inexistent_group_by(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 10,
"with_payload": True,
"with_vector": True,
"group_by": "inexistentDocId",
"group_size": 3,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 0
def search_array_group_by(collection_name: str, group_by: str):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [0.0, 1.0, 0.0, 0.0],
"limit": 6,
"with_payload": True,
"group_by": group_by,
"group_size": 3,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 6
group_ids = [g["id"] for g in groups]
for i in range(3):
assert f"valid_{i}" in group_ids
assert f"valid_too_{i}" in group_ids
def test_multi_value_group_by(collection_name):
search_array_group_by(collection_name, "multiId")
search_array_group_by(collection_name, "multiId[]")
def test_groups_by_heterogenous_fields(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [0.0, 0.0, 1.0, 0.0],
"limit": 10,
"with_payload": True,
"group_by": "heterogenousId",
"group_size": 3,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
group_ids = [g["id"] for g in groups]
# Expected group ids are: ['c', 3, 1, 123, 2, 'string', 'b', 'a']
assert len(groups) == 8
assert "c" in group_ids
assert 3 in group_ids
assert 1 in group_ids
assert 123 in group_ids
assert 2 in group_ids
assert "string" in group_ids
assert "b" in group_ids
assert "a" in group_ids
lookup_params = [
pytest.param(fixture_ref(lookup_collection_name), id="string name"),
pytest.param({"collection": fixture_ref(lookup_collection_name)}, id="only collection name"),
pytest.param(
{
"collection": fixture_ref(lookup_collection_name),
"with_payload": True,
"with_vectors": False,
},
id="explicit with_payload and with_vectors",
)
]
def assert_group_with_default_lookup(group, group_size=3):
assert group["hits"]
assert len(group["hits"]) == group_size
assert group["lookup"]
assert group["id"] == group["lookup"]["id"]
lookup = group["lookup"]
assert lookup["payload"]
assert not lookup.get("vector")
@parametrize("with_lookup", lookup_params, auto_refs=True)
def test_search_groups_with_lookup(collection_name, with_lookup):
with_lookup = jsons.load(jsons.dump(with_lookup))
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
"with_lookup": with_lookup,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 10
for group in groups:
assert_group_with_default_lookup(group, 3)
@parametrize("with_lookup", lookup_params)
def test_recommend_groups_with_lookup(request, collection_name, with_lookup):
#with_lookup["collection"] = str(request.getfixturevalue('lookup_collection_name'))
response = request_with_validation(
api="/collections/{collection_name}/points/recommend/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [5, 10, 15],
"negative": [6, 11, 16],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
"with_lookup": jsons.dump(with_lookup),
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 10
for group in groups:
assert_group_with_default_lookup(group, 3)
@parametrize(
"with_lookup",
[
pytest.param(
{
"collection": fixture_ref(lookup_collection_name),
"with_payload": False,
"with_vectors": False,
},
id="with_payload and with_vectors",
),
pytest.param(
{
"collection": fixture_ref(lookup_collection_name),
"with_payload": False,
"with_vector": False,
},
id="with_vector is alias of with_vectors",
),
]
)
def test_search_groups_with_lookup_without_payload_nor_vectors(collection_name, with_lookup):
with_lookup = jsons.load(jsons.dump(with_lookup))
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
"with_lookup": with_lookup,
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 10
for group in groups:
assert group["hits"]
assert len(group["hits"]) == 3
assert group["lookup"]
assert group["id"] == group["lookup"]["id"]
lookup = group["lookup"]
assert not lookup.get("payload")
assert not lookup.get("vector")
def test_search_groups_lookup_with_non_existing_collection(collection_name):
non_existing_collection = "non_existing_collection"
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
"with_lookup": {
"collection": non_existing_collection,
"with_payload": True,
"with_vector": True,
},
},
)
assert response.status_code == 404
assert (
f"Collection {non_existing_collection} not found"
in response.json()["status"]["error"]
)
def test_search_groups_with_full_lookup(collection_name, lookup_collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": [1.0, 0.0, 0.0, 0.0],
"limit": 10,
"with_payload": True,
"group_by": "docId",
"group_size": 3,
"with_lookup": {
"collection": lookup_collection_name,
"with_payload": True,
"with_vector": True,
},
},
)
assert response.ok
groups = response.json()["result"]["groups"]
assert len(groups) == 10
for group in groups:
assert group["hits"]
assert len(group["hits"]) == 3
assert group["lookup"]
assert group["id"] == group["lookup"]["id"]
lookup = group["lookup"]
assert lookup["payload"]
assert lookup["vector"]