Spaces:
Build error
Build error
import pytest | |
import jsons | |
from pytest_cases import parametrize, fixture_ref | |
from .helpers.collection_setup import basic_collection_setup, drop_collection | |
from .helpers.helpers import request_with_validation | |
def lookup_collection_name(collection_name) -> str: | |
return f"{collection_name}_lookup" | |
def set_serializer(lookup_collection_name): | |
def custom_serializer(obj: fixture_ref, **kwargs) -> str: | |
return lookup_collection_name | |
jsons.set_serializer(custom_serializer, fixture_ref) | |
def upsert_chunked_docs(collection_name, docs=50, chunks=5): | |
points = [] | |
for doc in range(docs): | |
for chunk in range(chunks): | |
doc_id = doc | |
i = doc * chunks + chunk | |
p = {"id": i, "vector": [1.0, 0.0, 0.0, 0.0], "payload": {"docId": doc_id}} | |
points.append(p) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points", | |
method="PUT", | |
path_params={"collection_name": collection_name}, | |
query_params={"wait": "true"}, | |
body={"points": points}, | |
) | |
assert response.ok | |
def upsert_points_with_array_fields(collection_name, docs=3, chunks=5, id_offset=5000): | |
points = [] | |
for doc in range(docs): | |
for chunk in range(chunks): | |
doc_ids = [f"valid_{doc}", f"valid_too_{doc}"] | |
i = doc * chunks + chunk + id_offset | |
p = { | |
"id": i, | |
"vector": [0.0, 1.0, 0.0, 0.0], | |
"payload": {"multiId": doc_ids}, | |
} | |
points.append(p) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points", | |
method="PUT", | |
path_params={"collection_name": collection_name}, | |
query_params={"wait": "true"}, | |
body={"points": points}, | |
) | |
assert response.ok | |
def upsert_with_heterogenous_fields(collection_name): | |
points = [ | |
{"id": 6000, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": "string"}}, # ok -> string | |
{"id": 6001, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": 123}}, # ok -> 123 | |
{"id": 6002, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": [1, 2, 3]}}, # ok -> 1 | |
{"id": 6003, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": ["a", "b", "c"]}}, # ok -> "a" | |
{"id": 6004, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": 2.42}}, # ok -> "2.42" | |
{"id": 6005, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": [["a", "b", "c"]]}}, # invalid | |
{"id": 6006, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": {"object": "string"}}}, # invalid | |
{"id": 6007, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": []}}, # invalid | |
{"id": 6008, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"heterogenousId": None}}, # invalid | |
] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points", | |
method="PUT", | |
path_params={"collection_name": collection_name}, | |
query_params={"wait": "true"}, | |
body={"points": points}, | |
) | |
assert response.ok | |
def upsert_multi_value_payload(collection_name): | |
points = [ | |
{"id": 9000 + i, "vector": [0.0, 0.0, 1.0, 1.0], "payload": {"mkey": ["a"]}} | |
for i in range(100) | |
] + [ | |
{"id": 9100 + i, "vector": [0.0, 0.0, 1.0, 0.0], "payload": {"mkey": ["a", "b"]}} | |
for i in range(10) | |
] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points", | |
method="PUT", | |
path_params={"collection_name": collection_name}, | |
query_params={"wait": "true"}, | |
body={"points": points}, | |
) | |
assert response.ok | |
def upsert_doc_points(collection_name, docs=50): | |
points = [ | |
{"id": i, "vector": [1.0, 0.0, 0.0, 0.0], "payload": {"body": f"doc body {i}"}} | |
for i in range(100) | |
] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points", | |
method="PUT", | |
path_params={"collection_name": collection_name}, | |
query_params={"wait": "true"}, | |
body={"points": points}, | |
) | |
assert response.ok | |
def setup(on_disk_vectors, collection_name, lookup_collection_name): | |
basic_collection_setup(collection_name=collection_name, on_disk_vectors=on_disk_vectors) | |
upsert_chunked_docs(collection_name=collection_name) | |
upsert_points_with_array_fields(collection_name=collection_name) | |
upsert_with_heterogenous_fields(collection_name=collection_name) | |
upsert_multi_value_payload(collection_name=collection_name) | |
basic_collection_setup(collection_name=lookup_collection_name, on_disk_vectors=on_disk_vectors) | |
upsert_doc_points(collection_name=lookup_collection_name) | |
yield | |
drop_collection(collection_name=collection_name) | |
drop_collection(collection_name=lookup_collection_name) | |
def test_search_with_multiple_groups(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [0.0, 0.0, 1.0, 1.0], | |
"limit": 2, | |
"with_payload": True, | |
"group_by": "mkey", | |
"group_size": 2, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 2 | |
assert groups[0]["id"] == "a" | |
assert groups[1]["id"] == "b" | |
def test_search(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 10 | |
for g in groups: | |
assert len(g["hits"]) == 3 | |
for h in g["hits"]: | |
assert h["payload"]["docId"] == g["id"] | |
def test_recommend(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [5, 10, 15], | |
"negative": [6, 11, 16], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 10 | |
for g in groups: | |
assert len(g["hits"]) == 3 | |
for h in g["hits"]: | |
assert h["payload"]["docId"] == g["id"] | |
def test_with_vectors(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 5, | |
"with_payload": True, | |
"with_vector": True, | |
"group_by": "docId", | |
"group_size": 3, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 5 | |
for g in groups: | |
assert len(g["hits"]) == 3 | |
for h in g["hits"]: | |
assert h["payload"]["docId"] == g["id"] | |
assert h["vector"] == [1.0, 0.0, 0.0, 0.0] | |
def test_inexistent_group_by(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"with_vector": True, | |
"group_by": "inexistentDocId", | |
"group_size": 3, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 0 | |
def search_array_group_by(collection_name: str, group_by: str): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [0.0, 1.0, 0.0, 0.0], | |
"limit": 6, | |
"with_payload": True, | |
"group_by": group_by, | |
"group_size": 3, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 6 | |
group_ids = [g["id"] for g in groups] | |
for i in range(3): | |
assert f"valid_{i}" in group_ids | |
assert f"valid_too_{i}" in group_ids | |
def test_multi_value_group_by(collection_name): | |
search_array_group_by(collection_name, "multiId") | |
search_array_group_by(collection_name, "multiId[]") | |
def test_groups_by_heterogenous_fields(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [0.0, 0.0, 1.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "heterogenousId", | |
"group_size": 3, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
group_ids = [g["id"] for g in groups] | |
# Expected group ids are: ['c', 3, 1, 123, 2, 'string', 'b', 'a'] | |
assert len(groups) == 8 | |
assert "c" in group_ids | |
assert 3 in group_ids | |
assert 1 in group_ids | |
assert 123 in group_ids | |
assert 2 in group_ids | |
assert "string" in group_ids | |
assert "b" in group_ids | |
assert "a" in group_ids | |
lookup_params = [ | |
pytest.param(fixture_ref(lookup_collection_name), id="string name"), | |
pytest.param({"collection": fixture_ref(lookup_collection_name)}, id="only collection name"), | |
pytest.param( | |
{ | |
"collection": fixture_ref(lookup_collection_name), | |
"with_payload": True, | |
"with_vectors": False, | |
}, | |
id="explicit with_payload and with_vectors", | |
) | |
] | |
def assert_group_with_default_lookup(group, group_size=3): | |
assert group["hits"] | |
assert len(group["hits"]) == group_size | |
assert group["lookup"] | |
assert group["id"] == group["lookup"]["id"] | |
lookup = group["lookup"] | |
assert lookup["payload"] | |
assert not lookup.get("vector") | |
def test_search_groups_with_lookup(collection_name, with_lookup): | |
with_lookup = jsons.load(jsons.dump(with_lookup)) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
"with_lookup": with_lookup, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 10 | |
for group in groups: | |
assert_group_with_default_lookup(group, 3) | |
def test_recommend_groups_with_lookup(request, collection_name, with_lookup): | |
#with_lookup["collection"] = str(request.getfixturevalue('lookup_collection_name')) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [5, 10, 15], | |
"negative": [6, 11, 16], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
"with_lookup": jsons.dump(with_lookup), | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 10 | |
for group in groups: | |
assert_group_with_default_lookup(group, 3) | |
def test_search_groups_with_lookup_without_payload_nor_vectors(collection_name, with_lookup): | |
with_lookup = jsons.load(jsons.dump(with_lookup)) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
"with_lookup": with_lookup, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 10 | |
for group in groups: | |
assert group["hits"] | |
assert len(group["hits"]) == 3 | |
assert group["lookup"] | |
assert group["id"] == group["lookup"]["id"] | |
lookup = group["lookup"] | |
assert not lookup.get("payload") | |
assert not lookup.get("vector") | |
def test_search_groups_lookup_with_non_existing_collection(collection_name): | |
non_existing_collection = "non_existing_collection" | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
"with_lookup": { | |
"collection": non_existing_collection, | |
"with_payload": True, | |
"with_vector": True, | |
}, | |
}, | |
) | |
assert response.status_code == 404 | |
assert ( | |
f"Collection {non_existing_collection} not found" | |
in response.json()["status"]["error"] | |
) | |
def test_search_groups_with_full_lookup(collection_name, lookup_collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": [1.0, 0.0, 0.0, 0.0], | |
"limit": 10, | |
"with_payload": True, | |
"group_by": "docId", | |
"group_size": 3, | |
"with_lookup": { | |
"collection": lookup_collection_name, | |
"with_payload": True, | |
"with_vector": True, | |
}, | |
}, | |
) | |
assert response.ok | |
groups = response.json()["result"]["groups"] | |
assert len(groups) == 10 | |
for group in groups: | |
assert group["hits"] | |
assert len(group["hits"]) == 3 | |
assert group["lookup"] | |
assert group["id"] == group["lookup"]["id"] | |
lookup = group["lookup"] | |
assert lookup["payload"] | |
assert lookup["vector"] | |