Spaces:
Build error
Build error
import uuid | |
from math import isclose | |
import pytest | |
from .helpers.collection_setup import drop_collection, full_collection_setup | |
from .helpers.helpers import reciprocal_rank_fusion, request_with_validation | |
uuid_1 = str(uuid.uuid4()) | |
uuid_2 = str(uuid.uuid4()) | |
uuid_3 = str(uuid.uuid4()) | |
def lookup_collection_name(collection_name): | |
return f"{collection_name}_lookup" | |
def setup(on_disk_vectors, collection_name): | |
full_collection_setup(collection_name=collection_name, on_disk_vectors=on_disk_vectors) | |
# keyword index on `city` | |
response = request_with_validation( | |
api="/collections/{collection_name}/index", | |
method="PUT", | |
query_params={'wait': 'true'}, | |
path_params={"collection_name": collection_name}, | |
body={"field_name": "city", "field_schema": "keyword"}, | |
) | |
assert response.ok, response.text | |
# integer index on count | |
response = request_with_validation( | |
api="/collections/{collection_name}/index", | |
method="PUT", | |
query_params={'wait': 'true'}, | |
path_params={"collection_name": collection_name}, | |
body={"field_name": "count", "field_schema": "integer"}, | |
) | |
assert response.ok, response.text | |
# UUID index | |
def set_payload(payload, points): | |
response = request_with_validation( | |
api='/collections/{collection_name}/points/payload', | |
method="POST", | |
path_params={'collection_name': collection_name}, | |
query_params={'wait': 'true'}, | |
body={ | |
"payload": payload, | |
"points": points | |
} | |
) | |
assert response.ok | |
# create payload | |
set_payload({"uuid": uuid_1}, [1]) | |
set_payload({"uuid": uuid_2}, [2]) | |
set_payload({"uuid": uuid_3}, [3]) | |
# Create index | |
response = request_with_validation( | |
api='/collections/{collection_name}/index', | |
method="PUT", | |
path_params={'collection_name': collection_name}, | |
query_params={'wait': 'true'}, | |
body={ | |
"field_name": "uuid", | |
"field_schema": "uuid" | |
} | |
) | |
assert response.ok | |
yield | |
drop_collection(collection_name=collection_name) | |
def root_and_rescored_query(collection_name, query, using, filter=None, limit=None, with_payload=None): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": query, | |
"limit": limit, | |
"filter": filter, | |
"with_payload": with_payload, | |
"using": using, | |
}, | |
) | |
assert response.ok, response.text | |
root_query_result = response.json()["result"]["points"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": { | |
"limit": 1000, | |
}, | |
"query": query, | |
"filter": filter, | |
"with_payload": with_payload, | |
"using": using, | |
}, | |
) | |
assert response.ok, response.text | |
nested_query_result = response.json()["result"]["points"] | |
assert root_query_result == nested_query_result | |
return root_query_result | |
def test_query_validation(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "dense-image", | |
"vector": [0.1, 0.2, 0.3, 0.4], | |
}, | |
"using": "dense-image", | |
"query": {"fusion": "rrf"}, | |
"limit": 10, | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Bad request: Fusion queries cannot be combined with the 'using' field." | |
def test_query_by_vector(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "dense-image", | |
"vector": [0.1, 0.2, 0.3, 0.4], | |
}, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, response.text | |
search_result = response.json()["result"] | |
default_query_result = root_and_rescored_query(collection_name, [0.1, 0.2, 0.3, 0.4], "dense-image") | |
nearest_query_result = root_and_rescored_query(collection_name, {"nearest": [0.1, 0.2, 0.3, 0.4]}, "dense-image") | |
assert search_result == default_query_result | |
assert search_result == nearest_query_result | |
def test_query_by_id(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": 2, | |
"using": "dense-image", | |
}, | |
) | |
assert response.ok, response.text | |
by_id_query_result = response.json()["result"]["points"] | |
top = by_id_query_result[0] | |
assert top["id"] != 2 # id 2 is excluded from the results | |
def test_filtered_query(collection_name): | |
filters = [ | |
{ | |
"must": [ | |
{ | |
"key": "city", | |
"match": { | |
"value": "Berlin" | |
} | |
} | |
] | |
}, | |
{ | |
"must_not": [ | |
{ | |
"key": "city", | |
"match": { | |
"value": "Berlin" | |
} | |
} | |
] | |
}, | |
{ | |
"should": [ | |
{ | |
"key": "city", | |
"match": { | |
"value": "Berlin" | |
} | |
} | |
] | |
}, | |
{ | |
"min_should": { | |
"conditions": [ | |
{ | |
"key": "city", | |
"match": { | |
"any": ["Berlin", "Moscow"] | |
} | |
}, | |
{ | |
"key": "count", | |
"match": { | |
"value": 0 | |
} | |
} | |
], | |
"min_count": 2 | |
} | |
} | |
] | |
for filter in filters: | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "dense-image", | |
"vector": [0.1, 0.2, 0.3, 0.4], | |
}, | |
"filter": filter, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, response.text | |
search_result = response.json()["result"] | |
default_query_result = root_and_rescored_query(collection_name, [0.1, 0.2, 0.3, 0.4], "dense-image", filter) | |
nearest_query_result = root_and_rescored_query(collection_name, {"nearest": [0.1, 0.2, 0.3, 0.4]}, "dense-image", filter) | |
assert search_result == default_query_result | |
assert search_result == nearest_query_result | |
def test_uuid_index_filtered_query(collection_name): | |
filters_arr = get_uuid_index_filters() | |
for item in filters_arr: | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": {"vector": [0.1, 0.2, 0.3, 0.4], "name": "dense-image"}, | |
"filter": item, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, f"{response.text}\n{item}" | |
search_result = response.json()["result"] | |
default_query_result = root_and_rescored_query(collection_name, [0.1, 0.2, 0.3, 0.4], "dense-image", filter=item) | |
nearest_query_result = root_and_rescored_query(collection_name, {"nearest": [0.1, 0.2, 0.3, 0.4]}, "dense-image", filter=item) | |
assert search_result == default_query_result | |
assert search_result == nearest_query_result | |
def test_scroll(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/scroll", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={}, | |
) | |
assert response.ok, response.text | |
scroll_result = response.json()["result"]["points"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"with_payload": True, | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"]["points"] | |
for record, scored_point in zip(scroll_result, query_result): | |
assert record.get("id") == scored_point.get("id") | |
assert record.get("payload") == scored_point.get("payload") | |
def test_filtered_scroll(collection_name): | |
filter = { | |
"must": [ | |
{ | |
"key": "city", | |
"match": { | |
"value": "Berlin" | |
} | |
} | |
] | |
} | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/scroll", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"filter": filter | |
}, | |
) | |
assert response.ok, response.text | |
scroll_result = response.json()["result"]["points"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"with_payload": True, | |
"filter": filter | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"]["points"] | |
for record, scored_point in zip(scroll_result, query_result): | |
assert record.get("id") == scored_point.get("id") | |
assert record.get("payload") == scored_point.get("payload") | |
def get_uuid_index_filters(): | |
# Check different filters | |
filters_arr = [] | |
match_conditions = [ | |
{"value": uuid_1}, | |
{"text": uuid_2}, | |
{"any": [uuid_1, uuid_2]}, | |
{"except": [uuid_1, uuid_2]} | |
] | |
for item in ["must", "must_not", "should"]: | |
for condition in match_conditions: | |
filters_arr.append( | |
{ | |
item: [ | |
{ | |
"key": "uuid", | |
"match": condition | |
} | |
] | |
} | |
) | |
# min_should | |
for condition in match_conditions: | |
filters_arr.append( | |
{ | |
"min_should": { | |
"conditions": [ | |
{ | |
"key": "uuid", | |
"match": condition | |
} | |
], | |
"min_count": 2 | |
} | |
} | |
) | |
return filters_arr | |
def test_recommend_avg(query_filter, collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [1, 2, 3, 4], | |
"negative": [3], | |
"limit": 10, | |
"using": "dense-image", | |
"filter": query_filter | |
}, | |
) | |
assert response.ok, response.text | |
recommend_result = response.json()["result"] | |
query_result = root_and_rescored_query(collection_name, | |
{ | |
"recommend": {"positive": [1, 2, 3, 4], "negative": [3]}, | |
}, | |
"dense-image", | |
filter=query_filter | |
) | |
assert recommend_result == query_result | |
def test_recommend_lookup_validations(collection_name, lookup_collection_name): | |
# delete lookup collection if exists | |
response = request_with_validation( | |
api='/collections/{collection_name}', | |
method="DELETE", | |
path_params={'collection_name': lookup_collection_name}, | |
) | |
assert response.ok, response.text | |
# re-create lookup collection | |
response = request_with_validation( | |
api='/collections/{collection_name}', | |
method="PUT", | |
path_params={'collection_name': lookup_collection_name}, | |
body={ | |
"vectors": { | |
"other": { | |
"size": 4, | |
"distance": "Dot", | |
} | |
} | |
} | |
) | |
assert response.ok, response.text | |
# insert vectors to lookup collection | |
response = request_with_validation( | |
api='/collections/{collection_name}/points', | |
method="PUT", | |
path_params={'collection_name': lookup_collection_name}, | |
query_params={'wait': 'true'}, | |
body={ | |
"points": [ | |
{ | |
"id": 1, | |
"vector": {"other": [1.0, 0.0, 0.0, 0.0]}, | |
}, | |
{ | |
"id": 2, | |
"vector": {"other": [0.0, 0.0, 0.0, 2.0]}, | |
}, | |
] | |
} | |
) | |
assert response.ok, response.text | |
# check query + lookup_from non-existing id | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2, 3], | |
}, | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
} | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Not found: No point with id 3 found" | |
# check query + lookup_from non-existing collection | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": "non-existing-collection", | |
"vector": "other" | |
} | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Not found: Collection non-existing-collection not found" | |
# check query + lookup_from non-existing vector | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "non-existing-vector" | |
} | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Wrong input: Not existing vector name error: non-existing-vector" | |
# check nested query + lookup_from non-existing id | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2, 3], | |
}, | |
}, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
} | |
} | |
], | |
"limit": 10, | |
"using": "dense-image", | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Not found: No point with id 3 found" | |
# check nested query + lookup_from non-existing collection | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": "non-existing-collection", | |
"vector": "other" | |
} | |
} | |
], | |
"limit": 10, | |
"using": "dense-image", | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Not found: Collection non-existing-collection not found" | |
# check nested query + lookup_from non-existing vector | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "non-existing-vector" | |
} | |
} | |
], | |
"limit": 10, | |
"using": "dense-image", | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert not response.ok, response.text | |
assert response.json()["status"]["error"] == "Wrong input: Not existing vector name error: non-existing-vector" | |
def test_recommend_lookup(collection_name, lookup_collection_name): | |
# delete lookup collection if exists | |
response = request_with_validation( | |
api='/collections/{collection_name}', | |
method="DELETE", | |
path_params={'collection_name': lookup_collection_name}, | |
) | |
assert response.ok, response.text | |
# re-create lookup collection | |
response = request_with_validation( | |
api='/collections/{collection_name}', | |
method="PUT", | |
path_params={'collection_name': lookup_collection_name}, | |
body={ | |
"vectors": { | |
"other": { | |
"size": 4, | |
"distance": "Dot", | |
} | |
} | |
} | |
) | |
assert response.ok, response.text | |
# insert vectors to lookup collection | |
response = request_with_validation( | |
api='/collections/{collection_name}/points', | |
method="PUT", | |
path_params={'collection_name': lookup_collection_name}, | |
query_params={'wait': 'true'}, | |
body={ | |
"points": [ | |
{ | |
"id": 1, | |
"vector": {"other": [1.0, 0.0, 0.0, 0.0]}, | |
}, | |
{ | |
"id": 2, | |
"vector": {"other": [0.0, 0.0, 0.0, 2.0]}, | |
}, | |
] | |
} | |
) | |
assert response.ok, response.text | |
# check recommend + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [1], | |
"negative": [2], | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
} | |
}, | |
) | |
assert response.ok, response.text | |
recommend_result = response.json()["result"] | |
# check query + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
} | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"]["points"] | |
# check equivalence recommend vs query | |
assert recommend_result == query_result, f"{recommend_result} != {query_result}" | |
# check nested query id + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
} | |
} | |
], | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert response.ok, response.text | |
nested_query_result_id = response.json()["result"]["points"] | |
# check nested query vector + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [[1.0, 0.0, 0.0, 0.0]], | |
"negative": [[0.0, 0.0, 0.0, 2.0]], | |
}, | |
}, | |
"using": "dense-image", | |
} | |
], | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert response.ok, response.text | |
nested_query_result_vector = response.json()["result"]["points"] | |
# check equivalence nested query id vs nested query vector | |
assert nested_query_result_id == nested_query_result_vector, f"{nested_query_result_id} != {nested_query_result_vector}" | |
def test_recommend_best_score(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [1, 2, 3, 4], | |
"negative": [3], | |
"limit": 10, | |
"strategy": "best_score", | |
"using": "dense-image", | |
}, | |
) | |
assert response.ok, response.text | |
recommend_result = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1, 2, 3, 4], | |
"negative": [3], | |
"strategy": "best_score", | |
}, | |
}, | |
"using": "dense-image", | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"]["points"] | |
assert recommend_result == query_result | |
def test_discover(query_filter, collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/discover", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"target": 2, | |
"context": [{"positive": 3, "negative": 4}], | |
"limit": 10, | |
"using": "dense-image", | |
"filter": query_filter | |
}, | |
) | |
assert response.ok, response.text | |
discover_result = response.json()["result"] | |
query_result = root_and_rescored_query(collection_name, | |
{ | |
"discover": { | |
"target": 2, | |
"context": [{"positive": 3, "negative": 4}], | |
} | |
}, | |
"dense-image", | |
filter=query_filter | |
) | |
assert discover_result == query_result | |
def test_context(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/discover", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"context": [{"positive": 2, "negative": 4}], | |
"limit": 100, | |
"using": "dense-image", | |
}, | |
) | |
assert response.ok, response.text | |
context_result = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"context": [{"positive": 2, "negative": 4}], | |
}, | |
"limit": 100, | |
"using": "dense-image", | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"]["points"] | |
assert set([p["id"] for p in context_result]) == set([p["id"] for p in query_result]) | |
def test_order_by(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/scroll", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"order_by": "count", | |
}, | |
) | |
assert response.ok, response.text | |
scroll_result = response.json()["result"]["points"] | |
query_result = root_and_rescored_query(collection_name, {"order_by": "count"}, "dense-image", with_payload=True) | |
for record, scored_point in zip(scroll_result, query_result): | |
assert record.get("id") == scored_point.get("id") | |
assert record.get("payload") == scored_point.get("payload") | |
def test_rrf(collection_name): | |
filter = { | |
"must": [ | |
{ | |
"key": "city", | |
"match": { | |
"value": "Berlin" | |
} | |
} | |
] | |
} | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "dense-image", | |
"vector": [0.1, 0.2, 0.3, 0.4] | |
}, | |
"filter": filter, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, response.text | |
search_result_1 = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "dense-text", | |
"vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] | |
}, | |
"filter": filter, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, response.text | |
search_result_2 = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "sparse-text", | |
"vector": { | |
"indices": [63, 65, 66], | |
"values": [1, 2.2, 3.3], | |
} | |
}, | |
"filter": filter, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, response.text | |
search_result_3 = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"vector": { | |
"name": "dense-multi", | |
"vector": [3.05, 3.61, 3.76, 3.74], # legacy API expands single vector to multiple vectors | |
}, | |
"filter": filter, | |
"limit": 10, | |
}, | |
) | |
assert response.ok, response.text | |
search_result_4 = response.json()["result"] | |
rrf_expected = reciprocal_rank_fusion([search_result_1, search_result_2, search_result_3, search_result_4], limit=10) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": [0.1, 0.2, 0.3, 0.4], | |
"using": "dense-image" | |
}, | |
{ | |
"query": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], | |
"using": "dense-text" | |
}, | |
{ | |
"query": { | |
"indices": [63, 65, 66], | |
"values": [1, 2.2, 3.3], | |
}, | |
"using": "sparse-text", | |
}, | |
{ | |
"query": [[3.05, 3.61, 3.76, 3.74]], | |
"using": "dense-multi" | |
}, | |
], | |
"filter": filter, | |
"limit": 10, | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert response.ok, response.json() | |
rrf_result = response.json()["result"]["points"] | |
def get_id(x): | |
return x["id"] | |
# rrf order is not deterministic with same scores, so we need to sort by id | |
for expected, result in zip(sorted(rrf_expected, key=get_id), sorted(rrf_result, key=get_id)): | |
assert expected["id"] == result["id"] | |
assert expected.get("payload") == result.get("payload") | |
assert isclose(expected["score"], result["score"], rel_tol=1e-5) | |
# test with inner filters instead of root filter | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": [0.1, 0.2, 0.3, 0.4], | |
"using": "dense-image", | |
"filter": filter, | |
}, | |
{ | |
"query": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], | |
"using": "dense-text", | |
"filter": filter, | |
}, | |
{ | |
"query": { | |
"indices": [63, 65, 66], | |
"values": [1, 2.2, 3.3], | |
}, | |
"using": "sparse-text", | |
"filter": filter, | |
}, | |
{ | |
"query": [[3.05, 3.61, 3.76, 3.74]], | |
"using": "dense-multi", | |
"filter": filter, | |
}, | |
], | |
"limit": 10, | |
"query": {"fusion": "rrf"} | |
}, | |
) | |
assert response.ok, response.json() | |
rrf_inner_filter_result = response.json()["result"]["points"] | |
for expected, result in zip(sorted(rrf_expected, key=get_id), sorted(rrf_inner_filter_result, key=get_id)): | |
assert expected["id"] == result["id"] | |
assert expected.get("payload") == result.get("payload") | |
assert isclose(expected["score"], result["score"], rel_tol=1e-5) | |
def test_sparse_dense_rerank_colbert(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": [0.1, 0.2, 0.3, 0.4], | |
"using": "dense-image" | |
}, | |
{ | |
"query": { | |
"indices": [63, 65, 66], | |
"values": [1, 2.2, 3.3], | |
}, | |
"using": "sparse-text", | |
} | |
], | |
"limit": 3, | |
"query": [[3.05, 3.61, 3.76, 3.74]], | |
"using": "dense-multi" | |
}, | |
) | |
assert response.ok, response.json() | |
rerank_result = response.json()["result"]["points"] | |
assert len(rerank_result) == 3 | |
# record current result to detect change | |
assert rerank_result[0]["id"] == 5 | |
assert rerank_result[1]["id"] == 2 | |
assert rerank_result[2]["id"] == 1 | |
def test_nearest_query_group(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [], | |
"limit": 3, | |
"query": [-1.9, 1.1, -1.1, 1.1], | |
"using": "dense-image", | |
"with_payload": True, | |
"group_by": "city", | |
"group_size": 2, | |
}, | |
) | |
groups = response.json()["result"]["groups"] | |
# found 3 groups has requested with `limit` | |
assert len(groups) == 3 | |
# group 1 | |
assert groups[0]["id"] == "Berlin" | |
assert len(groups[0]["hits"]) == 2 # group_size | |
assert groups[0]["hits"][0]["id"] == 1 | |
assert groups[0]["hits"][0]["payload"]["city"] == "Berlin" | |
assert groups[0]["hits"][1]["id"] == 3 | |
assert groups[0]["hits"][1]["payload"]["city"] == ["Berlin", "Moscow"] | |
# group 2 | |
assert groups[1]["id"] == "Moscow" | |
assert len(groups[1]["hits"]) == 2 # group_size | |
assert groups[1]["hits"][0]["id"] == 3 | |
assert groups[1]["hits"][0]["payload"]["city"] == ["Berlin", "Moscow"] | |
assert groups[1]["hits"][1]["id"] == 4 | |
assert groups[1]["hits"][1]["payload"]["city"] == ["London", "Moscow"] | |
# group 3 | |
assert groups[2]["id"] == "London" | |
assert len(groups[2]["hits"]) == 2 # group_size | |
assert groups[2]["hits"][0]["id"] == 2 | |
assert groups[2]["hits"][0]["payload"]["city"] == ["Berlin", "London"] | |
assert groups[2]["hits"][1]["id"] == 4 | |
assert groups[2]["hits"][1]["payload"]["city"] == ["London", "Moscow"] | |
def test_recommend_group(strategy, collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [1, 2, 3, 4], | |
"negative": [3], | |
"limit": 10, | |
"using": "dense-image", | |
"with_payload": True, | |
"strategy": strategy, | |
"group_by": "city", | |
"group_size": 2, | |
}, | |
) | |
assert response.ok, response.text | |
recommend_result = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1, 2, 3, 4], | |
"negative": [3], | |
}, | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"with_payload": True, | |
"strategy": strategy, | |
"group_by": "city", | |
"group_size": 2, | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"] | |
assert recommend_result == query_result, f"{recommend_result} != {query_result}" | |
def test_order_by_group(direction, collection_name): | |
# will check equivalence of scroll and query result with order_by group | |
# where query uses a single result per group | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/scroll", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"order_by": { | |
"key": "count", | |
"direction": direction, | |
}, | |
"limit": 50, | |
}, | |
) | |
assert response.ok, response.text | |
scroll_result = response.json()["result"]["points"] | |
# keep only first result per payload value | |
seen_payloads = set() | |
filtered_scroll_result = [] | |
for record in scroll_result: | |
if record["payload"]["count"] in seen_payloads: | |
continue | |
else: | |
seen_payloads.add(record["payload"]["count"]) | |
filtered_scroll_result.append(record) | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"order_by": { | |
"key": "count", | |
"direction": direction, | |
} | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"with_payload": True, | |
"group_by": "count", | |
"group_size": 1, | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"] | |
# flatten group result to match scroll result | |
flatten_query_result = [] | |
for group in query_result["groups"]: | |
flatten_query_result.extend(group["hits"]) | |
for record, scored_point in zip(filtered_scroll_result, flatten_query_result): | |
assert record.get("id") == scored_point.get("id") | |
assert record.get("payload") == scored_point.get("payload") | |
def test_discover_group(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [], | |
"limit": 2, | |
"query": { | |
"discover": { | |
"target": 5, | |
"context": [{"positive": 3, "negative": 4}], | |
} | |
}, | |
"using": "dense-image", | |
"with_payload": True, | |
"group_by": "city", | |
"group_size": 2, | |
}, | |
) | |
assert response.ok, response.text | |
groups = response.json()["result"]["groups"] | |
# found 2 groups has requested with `limit` | |
assert len(groups) == 2 | |
# group 1 | |
assert groups[0]["id"] == "Berlin" | |
assert len(groups[0]["hits"]) == 2 # group_size | |
assert groups[0]["hits"][0]["id"] == 1 | |
assert groups[0]["hits"][0]["payload"]["city"] == "Berlin" | |
assert groups[0]["hits"][1]["id"] == 2 | |
assert groups[0]["hits"][1]["payload"]["city"] == ["Berlin", "London"] | |
# group 2 | |
assert groups[1]["id"] == "London" | |
assert len(groups[1]["hits"]) == 1 | |
assert groups[1]["hits"][0]["id"] == 2 | |
assert groups[1]["hits"][0]["payload"]["city"] == ["Berlin", "London"] | |
def test_recommend_lookup_group(collection_name, lookup_collection_name): | |
# delete lookup collection if exists | |
response = request_with_validation( | |
api='/collections/{collection_name}', | |
method="DELETE", | |
path_params={'collection_name': lookup_collection_name}, | |
) | |
assert response.ok, response.text | |
# re-create lookup collection | |
response = request_with_validation( | |
api='/collections/{collection_name}', | |
method="PUT", | |
path_params={'collection_name': lookup_collection_name}, | |
body={ | |
"vectors": { | |
"other": { | |
"size": 4, | |
"distance": "Dot", | |
} | |
} | |
} | |
) | |
assert response.ok, response.text | |
# insert vectors to lookup collection | |
response = request_with_validation( | |
api='/collections/{collection_name}/points', | |
method="PUT", | |
path_params={'collection_name': lookup_collection_name}, | |
query_params={'wait': 'true'}, | |
body={ | |
"points": [ | |
{ | |
"id": 1, | |
"vector": {"other": [10.0, 10.0, 10.0, 10.0]}, | |
}, | |
{ | |
"id": 2, | |
"vector": {"other": [20.0, 0.0, 0.0, 0.0]}, | |
}, | |
] | |
} | |
) | |
assert response.ok, response.text | |
# check recommend group + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"positive": [1], | |
"negative": [2], | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
}, | |
"group_by": "city", | |
"group_size": 2, | |
}, | |
) | |
assert response.ok, response.text | |
recommend_result = response.json()["result"] | |
# check query + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"limit": 10, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
}, | |
"group_by": "city", | |
"group_size": 2, | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"] | |
# check equivalence recommend vs query | |
assert recommend_result == query_result, f"{recommend_result} != {query_result}" | |
# check nested query id + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [1], | |
"negative": [2], | |
}, | |
}, | |
"using": "dense-image", | |
"lookup_from": { | |
"collection": lookup_collection_name, | |
"vector": "other" | |
}, | |
} | |
], | |
"group_by": "city", | |
"group_size": 2, | |
"query": {"fusion": "rrf"}, | |
}, | |
) | |
assert response.ok, response.text | |
nested_query_result_id = response.json()["result"] | |
# check nested query vector + lookup_from | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/groups", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": [ | |
{ | |
"query": { | |
"recommend": { | |
"positive": [[10.0, 10.0, 10.0, 10.0]], | |
"negative": [[20.0, 0.0, 0.0, 0.0]], | |
}, | |
}, | |
"using": "dense-image", | |
} | |
], | |
"group_by": "city", | |
"group_size": 2, | |
"query": {"fusion": "rrf"}, | |
}, | |
) | |
assert response.ok, response.text | |
nested_query_result_vector = response.json()["result"] | |
# check equivalence nested query id vs nested query vector | |
assert nested_query_result_id == nested_query_result_vector, f"{nested_query_result_id} != {nested_query_result_vector}" | |
def test_random_rescore_with_offset(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": { "limit": 1 }, | |
"query": {"sample": "random"}, | |
}, | |
) | |
assert response.ok, response.json() | |
random_result = response.json()["result"]["points"] | |
assert len(random_result) == 1 | |
assert random_result[0]["id"] == 1 | |
# assert offset is propagated to prefetch | |
seen = set() | |
for _ in range(100): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"prefetch": { "limit": 1 }, | |
"query": {"sample": "random"}, | |
"offset": 1, | |
}, | |
) | |
assert response.ok, response.json() | |
random_result = response.json()["result"]["points"] | |
assert len(random_result) == 1 | |
seen.add(random_result[0]["id"]) | |
if seen == {1, 2}: | |
return | |
# Although prefetch limit is 1, offset should be propagated, so randomness is applied to points 1 and 2. | |
# By this point we should've seen both points. | |
assert False, f"after 100 tries, `seen` is expected to be {{1, 2}}, but it was {seen}" | |
def test_nearest_query_batch(query_filter, collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/search/batch", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"searches": [ | |
{ | |
"limit": 3, | |
"vector": { | |
"vector": [-1.9, 1.1, -1.1, 1.1], | |
"name": "dense-image" | |
}, | |
"with_payload": True, | |
"filter": query_filter | |
}, | |
{ | |
"limit": 3, | |
"vector": { | |
"vector": [0.19, 0.83, 0.75, -0.11], | |
"name": "dense-image", | |
}, | |
"with_payload": True, | |
"filter": query_filter | |
} | |
] | |
}, | |
) | |
assert response.ok, response.text | |
search_result = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/batch", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"searches": [ | |
{ | |
"limit": 3, | |
"query": [-1.9, 1.1, -1.1, 1.1], | |
"using": "dense-image", | |
"with_payload": True, | |
"filter": query_filter | |
}, | |
{ | |
"limit": 3, | |
"query": [0.19, 0.83, 0.75, -0.11], | |
"using": "dense-image", | |
"with_payload": True, | |
"filter": query_filter | |
} | |
] | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"] | |
assert search_result[0] == query_result[0]["points"] | |
assert search_result[1] == query_result[1]["points"] | |
def test_recommend_batch(query_filter, collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/recommend/batch", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"searches": [ | |
{ | |
"positive": [1, 2, 3, 4], | |
"negative": [3], | |
"limit": 10, | |
"using": "dense-image", | |
"filter": query_filter, | |
"with_payload": True, | |
}, | |
{ | |
"positive": [3, 4], | |
"negative": [4], | |
"limit": 10, | |
"using": "dense-image", | |
"filter": query_filter, | |
"with_payload": True, | |
} | |
] | |
}, | |
) | |
assert response.ok, response.text | |
search_result = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/batch", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"searches": [ | |
{ | |
"limit": 10, | |
"query": {"recommend": {"positive": [1, 2, 3, 4], "negative": [3]}}, | |
"using": "dense-image", | |
"with_payload": True, | |
"filter": query_filter | |
}, | |
{ | |
"limit": 10, | |
"query": {"recommend": {"positive": [3, 4], "negative": [4]}}, | |
"using": "dense-image", | |
"with_payload": True, | |
"filter": query_filter | |
} | |
] | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"] | |
assert search_result[0] == query_result[0]["points"] | |
assert search_result[1] == query_result[1]["points"] | |
def test_discover_batch(query_filter, collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/discover/batch", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"searches": [ | |
{ | |
"target": 2, | |
"context": [{"positive": 3, "negative": 4}], | |
"limit": 10, | |
"using": "dense-image", | |
"filter": query_filter, | |
"with_payload": True, | |
}, | |
{ | |
"target": 4, | |
"context": [{"positive": 1, "negative": 2}], | |
"limit": 10, | |
"using": "dense-image", | |
"filter": query_filter, | |
"with_payload": True, | |
} | |
] | |
}, | |
) | |
assert response.ok, response.text | |
search_result = response.json()["result"] | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query/batch", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"searches": [ | |
{ | |
"limit": 10, | |
"query": { | |
"discover": { | |
"target": 2, | |
"context": [{"positive": 3, "negative": 4}], | |
} | |
}, | |
"using": "dense-image", | |
"with_payload": True, | |
"filter": query_filter | |
}, | |
{ | |
"limit": 10, | |
"query": { | |
"discover": { | |
"target": 4, | |
"context": [{"positive": 1, "negative": 2}], | |
} | |
}, | |
"using": "dense-image", | |
"with_payload": True, | |
"filter": query_filter | |
} | |
] | |
}, | |
) | |
assert response.ok, response.text | |
query_result = response.json()["result"] | |
assert search_result[0] == query_result[0]["points"] | |
assert search_result[1] == query_result[1]["points"] | |
# Qdrant did panic for some Query API requests when using a vector name that is not existing | |
# for the given point. This tests ensures that a proper error response gets returned. | |
# See https://github.com/qdrant/qdrant/issues/5208 for more details. | |
def test_query_with_missing_vector(collection_name): | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": 7, | |
"using": "sparse-text" | |
}, | |
) | |
assert response.ok | |
response = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": 8, # Point with ID=8 doesn't have a vector 'sparse-text' which caused Qdrant to panic before. | |
"using": "sparse-text" | |
}, | |
) | |
assert not response.ok | |
assert 'error' in response.json()['status'] | |
response2 = request_with_validation( | |
api="/collections/{collection_name}/points/query", | |
method="POST", | |
path_params={"collection_name": collection_name}, | |
body={ | |
"query": 8, # Point with ID=8 doesn't have a default vector which caused Qdrant to panic before. | |
}, | |
) | |
assert not response2.ok | |
assert 'error' in response2.json()['status'] | |