colibri.qdrant / tests /openapi /test_query_full.py
Gouzi Mohaled
Ajout du dossier tests
3932407
import uuid
from math import isclose
import pytest
from .helpers.collection_setup import drop_collection, full_collection_setup
from .helpers.helpers import reciprocal_rank_fusion, request_with_validation
uuid_1 = str(uuid.uuid4())
uuid_2 = str(uuid.uuid4())
uuid_3 = str(uuid.uuid4())
@pytest.fixture(scope='module', autouse=True)
def lookup_collection_name(collection_name):
return f"{collection_name}_lookup"
@pytest.fixture(autouse=True, scope="module")
def setup(on_disk_vectors, collection_name):
full_collection_setup(collection_name=collection_name, on_disk_vectors=on_disk_vectors)
# keyword index on `city`
response = request_with_validation(
api="/collections/{collection_name}/index",
method="PUT",
query_params={'wait': 'true'},
path_params={"collection_name": collection_name},
body={"field_name": "city", "field_schema": "keyword"},
)
assert response.ok, response.text
# integer index on count
response = request_with_validation(
api="/collections/{collection_name}/index",
method="PUT",
query_params={'wait': 'true'},
path_params={"collection_name": collection_name},
body={"field_name": "count", "field_schema": "integer"},
)
assert response.ok, response.text
# UUID index
def set_payload(payload, points):
response = request_with_validation(
api='/collections/{collection_name}/points/payload',
method="POST",
path_params={'collection_name': collection_name},
query_params={'wait': 'true'},
body={
"payload": payload,
"points": points
}
)
assert response.ok
# create payload
set_payload({"uuid": uuid_1}, [1])
set_payload({"uuid": uuid_2}, [2])
set_payload({"uuid": uuid_3}, [3])
# Create index
response = request_with_validation(
api='/collections/{collection_name}/index',
method="PUT",
path_params={'collection_name': collection_name},
query_params={'wait': 'true'},
body={
"field_name": "uuid",
"field_schema": "uuid"
}
)
assert response.ok
yield
drop_collection(collection_name=collection_name)
def root_and_rescored_query(collection_name, query, using, filter=None, limit=None, with_payload=None):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": query,
"limit": limit,
"filter": filter,
"with_payload": with_payload,
"using": using,
},
)
assert response.ok, response.text
root_query_result = response.json()["result"]["points"]
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": {
"limit": 1000,
},
"query": query,
"filter": filter,
"with_payload": with_payload,
"using": using,
},
)
assert response.ok, response.text
nested_query_result = response.json()["result"]["points"]
assert root_query_result == nested_query_result
return root_query_result
def test_query_validation(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "dense-image",
"vector": [0.1, 0.2, 0.3, 0.4],
},
"using": "dense-image",
"query": {"fusion": "rrf"},
"limit": 10,
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Bad request: Fusion queries cannot be combined with the 'using' field."
def test_query_by_vector(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "dense-image",
"vector": [0.1, 0.2, 0.3, 0.4],
},
"limit": 10,
},
)
assert response.ok, response.text
search_result = response.json()["result"]
default_query_result = root_and_rescored_query(collection_name, [0.1, 0.2, 0.3, 0.4], "dense-image")
nearest_query_result = root_and_rescored_query(collection_name, {"nearest": [0.1, 0.2, 0.3, 0.4]}, "dense-image")
assert search_result == default_query_result
assert search_result == nearest_query_result
def test_query_by_id(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": 2,
"using": "dense-image",
},
)
assert response.ok, response.text
by_id_query_result = response.json()["result"]["points"]
top = by_id_query_result[0]
assert top["id"] != 2 # id 2 is excluded from the results
def test_filtered_query(collection_name):
filters = [
{
"must": [
{
"key": "city",
"match": {
"value": "Berlin"
}
}
]
},
{
"must_not": [
{
"key": "city",
"match": {
"value": "Berlin"
}
}
]
},
{
"should": [
{
"key": "city",
"match": {
"value": "Berlin"
}
}
]
},
{
"min_should": {
"conditions": [
{
"key": "city",
"match": {
"any": ["Berlin", "Moscow"]
}
},
{
"key": "count",
"match": {
"value": 0
}
}
],
"min_count": 2
}
}
]
for filter in filters:
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "dense-image",
"vector": [0.1, 0.2, 0.3, 0.4],
},
"filter": filter,
"limit": 10,
},
)
assert response.ok, response.text
search_result = response.json()["result"]
default_query_result = root_and_rescored_query(collection_name, [0.1, 0.2, 0.3, 0.4], "dense-image", filter)
nearest_query_result = root_and_rescored_query(collection_name, {"nearest": [0.1, 0.2, 0.3, 0.4]}, "dense-image", filter)
assert search_result == default_query_result
assert search_result == nearest_query_result
def test_uuid_index_filtered_query(collection_name):
filters_arr = get_uuid_index_filters()
for item in filters_arr:
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {"vector": [0.1, 0.2, 0.3, 0.4], "name": "dense-image"},
"filter": item,
"limit": 10,
},
)
assert response.ok, f"{response.text}\n{item}"
search_result = response.json()["result"]
default_query_result = root_and_rescored_query(collection_name, [0.1, 0.2, 0.3, 0.4], "dense-image", filter=item)
nearest_query_result = root_and_rescored_query(collection_name, {"nearest": [0.1, 0.2, 0.3, 0.4]}, "dense-image", filter=item)
assert search_result == default_query_result
assert search_result == nearest_query_result
def test_scroll(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/scroll",
method="POST",
path_params={"collection_name": collection_name},
body={},
)
assert response.ok, response.text
scroll_result = response.json()["result"]["points"]
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"with_payload": True,
},
)
assert response.ok, response.text
query_result = response.json()["result"]["points"]
for record, scored_point in zip(scroll_result, query_result):
assert record.get("id") == scored_point.get("id")
assert record.get("payload") == scored_point.get("payload")
def test_filtered_scroll(collection_name):
filter = {
"must": [
{
"key": "city",
"match": {
"value": "Berlin"
}
}
]
}
response = request_with_validation(
api="/collections/{collection_name}/points/scroll",
method="POST",
path_params={"collection_name": collection_name},
body={
"filter": filter
},
)
assert response.ok, response.text
scroll_result = response.json()["result"]["points"]
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"with_payload": True,
"filter": filter
},
)
assert response.ok, response.text
query_result = response.json()["result"]["points"]
for record, scored_point in zip(scroll_result, query_result):
assert record.get("id") == scored_point.get("id")
assert record.get("payload") == scored_point.get("payload")
def get_uuid_index_filters():
# Check different filters
filters_arr = []
match_conditions = [
{"value": uuid_1},
{"text": uuid_2},
{"any": [uuid_1, uuid_2]},
{"except": [uuid_1, uuid_2]}
]
for item in ["must", "must_not", "should"]:
for condition in match_conditions:
filters_arr.append(
{
item: [
{
"key": "uuid",
"match": condition
}
]
}
)
# min_should
for condition in match_conditions:
filters_arr.append(
{
"min_should": {
"conditions": [
{
"key": "uuid",
"match": condition
}
],
"min_count": 2
}
}
)
return filters_arr
@pytest.mark.parametrize("query_filter", [None, *get_uuid_index_filters()])
def test_recommend_avg(query_filter, collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/recommend",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [1, 2, 3, 4],
"negative": [3],
"limit": 10,
"using": "dense-image",
"filter": query_filter
},
)
assert response.ok, response.text
recommend_result = response.json()["result"]
query_result = root_and_rescored_query(collection_name,
{
"recommend": {"positive": [1, 2, 3, 4], "negative": [3]},
},
"dense-image",
filter=query_filter
)
assert recommend_result == query_result
def test_recommend_lookup_validations(collection_name, lookup_collection_name):
# delete lookup collection if exists
response = request_with_validation(
api='/collections/{collection_name}',
method="DELETE",
path_params={'collection_name': lookup_collection_name},
)
assert response.ok, response.text
# re-create lookup collection
response = request_with_validation(
api='/collections/{collection_name}',
method="PUT",
path_params={'collection_name': lookup_collection_name},
body={
"vectors": {
"other": {
"size": 4,
"distance": "Dot",
}
}
}
)
assert response.ok, response.text
# insert vectors to lookup collection
response = request_with_validation(
api='/collections/{collection_name}/points',
method="PUT",
path_params={'collection_name': lookup_collection_name},
query_params={'wait': 'true'},
body={
"points": [
{
"id": 1,
"vector": {"other": [1.0, 0.0, 0.0, 0.0]},
},
{
"id": 2,
"vector": {"other": [0.0, 0.0, 0.0, 2.0]},
},
]
}
)
assert response.ok, response.text
# check query + lookup_from non-existing id
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1],
"negative": [2, 3],
},
},
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
}
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Not found: No point with id 3 found"
# check query + lookup_from non-existing collection
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": "non-existing-collection",
"vector": "other"
}
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Not found: Collection non-existing-collection not found"
# check query + lookup_from non-existing vector
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "non-existing-vector"
}
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Wrong input: Not existing vector name error: non-existing-vector"
# check nested query + lookup_from non-existing id
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [1],
"negative": [2, 3],
},
},
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
}
}
],
"limit": 10,
"using": "dense-image",
"query": {"fusion": "rrf"}
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Not found: No point with id 3 found"
# check nested query + lookup_from non-existing collection
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"using": "dense-image",
"lookup_from": {
"collection": "non-existing-collection",
"vector": "other"
}
}
],
"limit": 10,
"using": "dense-image",
"query": {"fusion": "rrf"}
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Not found: Collection non-existing-collection not found"
# check nested query + lookup_from non-existing vector
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "non-existing-vector"
}
}
],
"limit": 10,
"using": "dense-image",
"query": {"fusion": "rrf"}
},
)
assert not response.ok, response.text
assert response.json()["status"]["error"] == "Wrong input: Not existing vector name error: non-existing-vector"
def test_recommend_lookup(collection_name, lookup_collection_name):
# delete lookup collection if exists
response = request_with_validation(
api='/collections/{collection_name}',
method="DELETE",
path_params={'collection_name': lookup_collection_name},
)
assert response.ok, response.text
# re-create lookup collection
response = request_with_validation(
api='/collections/{collection_name}',
method="PUT",
path_params={'collection_name': lookup_collection_name},
body={
"vectors": {
"other": {
"size": 4,
"distance": "Dot",
}
}
}
)
assert response.ok, response.text
# insert vectors to lookup collection
response = request_with_validation(
api='/collections/{collection_name}/points',
method="PUT",
path_params={'collection_name': lookup_collection_name},
query_params={'wait': 'true'},
body={
"points": [
{
"id": 1,
"vector": {"other": [1.0, 0.0, 0.0, 0.0]},
},
{
"id": 2,
"vector": {"other": [0.0, 0.0, 0.0, 2.0]},
},
]
}
)
assert response.ok, response.text
# check recommend + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/recommend",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [1],
"negative": [2],
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
}
},
)
assert response.ok, response.text
recommend_result = response.json()["result"]
# check query + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
}
},
)
assert response.ok, response.text
query_result = response.json()["result"]["points"]
# check equivalence recommend vs query
assert recommend_result == query_result, f"{recommend_result} != {query_result}"
# check nested query id + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
}
}
],
"query": {"fusion": "rrf"}
},
)
assert response.ok, response.text
nested_query_result_id = response.json()["result"]["points"]
# check nested query vector + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [[1.0, 0.0, 0.0, 0.0]],
"negative": [[0.0, 0.0, 0.0, 2.0]],
},
},
"using": "dense-image",
}
],
"query": {"fusion": "rrf"}
},
)
assert response.ok, response.text
nested_query_result_vector = response.json()["result"]["points"]
# check equivalence nested query id vs nested query vector
assert nested_query_result_id == nested_query_result_vector, f"{nested_query_result_id} != {nested_query_result_vector}"
def test_recommend_best_score(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/recommend",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [1, 2, 3, 4],
"negative": [3],
"limit": 10,
"strategy": "best_score",
"using": "dense-image",
},
)
assert response.ok, response.text
recommend_result = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1, 2, 3, 4],
"negative": [3],
"strategy": "best_score",
},
},
"using": "dense-image",
},
)
assert response.ok, response.text
query_result = response.json()["result"]["points"]
assert recommend_result == query_result
@pytest.mark.parametrize("query_filter", [None, *get_uuid_index_filters()])
def test_discover(query_filter, collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/discover",
method="POST",
path_params={"collection_name": collection_name},
body={
"target": 2,
"context": [{"positive": 3, "negative": 4}],
"limit": 10,
"using": "dense-image",
"filter": query_filter
},
)
assert response.ok, response.text
discover_result = response.json()["result"]
query_result = root_and_rescored_query(collection_name,
{
"discover": {
"target": 2,
"context": [{"positive": 3, "negative": 4}],
}
},
"dense-image",
filter=query_filter
)
assert discover_result == query_result
def test_context(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/discover",
method="POST",
path_params={"collection_name": collection_name},
body={
"context": [{"positive": 2, "negative": 4}],
"limit": 100,
"using": "dense-image",
},
)
assert response.ok, response.text
context_result = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"context": [{"positive": 2, "negative": 4}],
},
"limit": 100,
"using": "dense-image",
},
)
assert response.ok, response.text
query_result = response.json()["result"]["points"]
assert set([p["id"] for p in context_result]) == set([p["id"] for p in query_result])
def test_order_by(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/scroll",
method="POST",
path_params={"collection_name": collection_name},
body={
"order_by": "count",
},
)
assert response.ok, response.text
scroll_result = response.json()["result"]["points"]
query_result = root_and_rescored_query(collection_name, {"order_by": "count"}, "dense-image", with_payload=True)
for record, scored_point in zip(scroll_result, query_result):
assert record.get("id") == scored_point.get("id")
assert record.get("payload") == scored_point.get("payload")
def test_rrf(collection_name):
filter = {
"must": [
{
"key": "city",
"match": {
"value": "Berlin"
}
}
]
}
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "dense-image",
"vector": [0.1, 0.2, 0.3, 0.4]
},
"filter": filter,
"limit": 10,
},
)
assert response.ok, response.text
search_result_1 = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "dense-text",
"vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
},
"filter": filter,
"limit": 10,
},
)
assert response.ok, response.text
search_result_2 = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "sparse-text",
"vector": {
"indices": [63, 65, 66],
"values": [1, 2.2, 3.3],
}
},
"filter": filter,
"limit": 10,
},
)
assert response.ok, response.text
search_result_3 = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/search",
method="POST",
path_params={"collection_name": collection_name},
body={
"vector": {
"name": "dense-multi",
"vector": [3.05, 3.61, 3.76, 3.74], # legacy API expands single vector to multiple vectors
},
"filter": filter,
"limit": 10,
},
)
assert response.ok, response.text
search_result_4 = response.json()["result"]
rrf_expected = reciprocal_rank_fusion([search_result_1, search_result_2, search_result_3, search_result_4], limit=10)
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": [0.1, 0.2, 0.3, 0.4],
"using": "dense-image"
},
{
"query": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
"using": "dense-text"
},
{
"query": {
"indices": [63, 65, 66],
"values": [1, 2.2, 3.3],
},
"using": "sparse-text",
},
{
"query": [[3.05, 3.61, 3.76, 3.74]],
"using": "dense-multi"
},
],
"filter": filter,
"limit": 10,
"query": {"fusion": "rrf"}
},
)
assert response.ok, response.json()
rrf_result = response.json()["result"]["points"]
def get_id(x):
return x["id"]
# rrf order is not deterministic with same scores, so we need to sort by id
for expected, result in zip(sorted(rrf_expected, key=get_id), sorted(rrf_result, key=get_id)):
assert expected["id"] == result["id"]
assert expected.get("payload") == result.get("payload")
assert isclose(expected["score"], result["score"], rel_tol=1e-5)
# test with inner filters instead of root filter
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": [0.1, 0.2, 0.3, 0.4],
"using": "dense-image",
"filter": filter,
},
{
"query": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
"using": "dense-text",
"filter": filter,
},
{
"query": {
"indices": [63, 65, 66],
"values": [1, 2.2, 3.3],
},
"using": "sparse-text",
"filter": filter,
},
{
"query": [[3.05, 3.61, 3.76, 3.74]],
"using": "dense-multi",
"filter": filter,
},
],
"limit": 10,
"query": {"fusion": "rrf"}
},
)
assert response.ok, response.json()
rrf_inner_filter_result = response.json()["result"]["points"]
for expected, result in zip(sorted(rrf_expected, key=get_id), sorted(rrf_inner_filter_result, key=get_id)):
assert expected["id"] == result["id"]
assert expected.get("payload") == result.get("payload")
assert isclose(expected["score"], result["score"], rel_tol=1e-5)
def test_sparse_dense_rerank_colbert(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": [0.1, 0.2, 0.3, 0.4],
"using": "dense-image"
},
{
"query": {
"indices": [63, 65, 66],
"values": [1, 2.2, 3.3],
},
"using": "sparse-text",
}
],
"limit": 3,
"query": [[3.05, 3.61, 3.76, 3.74]],
"using": "dense-multi"
},
)
assert response.ok, response.json()
rerank_result = response.json()["result"]["points"]
assert len(rerank_result) == 3
# record current result to detect change
assert rerank_result[0]["id"] == 5
assert rerank_result[1]["id"] == 2
assert rerank_result[2]["id"] == 1
def test_nearest_query_group(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [],
"limit": 3,
"query": [-1.9, 1.1, -1.1, 1.1],
"using": "dense-image",
"with_payload": True,
"group_by": "city",
"group_size": 2,
},
)
groups = response.json()["result"]["groups"]
# found 3 groups has requested with `limit`
assert len(groups) == 3
# group 1
assert groups[0]["id"] == "Berlin"
assert len(groups[0]["hits"]) == 2 # group_size
assert groups[0]["hits"][0]["id"] == 1
assert groups[0]["hits"][0]["payload"]["city"] == "Berlin"
assert groups[0]["hits"][1]["id"] == 3
assert groups[0]["hits"][1]["payload"]["city"] == ["Berlin", "Moscow"]
# group 2
assert groups[1]["id"] == "Moscow"
assert len(groups[1]["hits"]) == 2 # group_size
assert groups[1]["hits"][0]["id"] == 3
assert groups[1]["hits"][0]["payload"]["city"] == ["Berlin", "Moscow"]
assert groups[1]["hits"][1]["id"] == 4
assert groups[1]["hits"][1]["payload"]["city"] == ["London", "Moscow"]
# group 3
assert groups[2]["id"] == "London"
assert len(groups[2]["hits"]) == 2 # group_size
assert groups[2]["hits"][0]["id"] == 2
assert groups[2]["hits"][0]["payload"]["city"] == ["Berlin", "London"]
assert groups[2]["hits"][1]["id"] == 4
assert groups[2]["hits"][1]["payload"]["city"] == ["London", "Moscow"]
@pytest.mark.parametrize("strategy", [
"best_score",
"average_vector",
])
def test_recommend_group(strategy, collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/recommend/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [1, 2, 3, 4],
"negative": [3],
"limit": 10,
"using": "dense-image",
"with_payload": True,
"strategy": strategy,
"group_by": "city",
"group_size": 2,
},
)
assert response.ok, response.text
recommend_result = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1, 2, 3, 4],
"negative": [3],
},
},
"limit": 10,
"using": "dense-image",
"with_payload": True,
"strategy": strategy,
"group_by": "city",
"group_size": 2,
},
)
assert response.ok, response.text
query_result = response.json()["result"]
assert recommend_result == query_result, f"{recommend_result} != {query_result}"
@pytest.mark.parametrize("direction", [
"asc",
"desc",
])
def test_order_by_group(direction, collection_name):
# will check equivalence of scroll and query result with order_by group
# where query uses a single result per group
response = request_with_validation(
api="/collections/{collection_name}/points/scroll",
method="POST",
path_params={"collection_name": collection_name},
body={
"order_by": {
"key": "count",
"direction": direction,
},
"limit": 50,
},
)
assert response.ok, response.text
scroll_result = response.json()["result"]["points"]
# keep only first result per payload value
seen_payloads = set()
filtered_scroll_result = []
for record in scroll_result:
if record["payload"]["count"] in seen_payloads:
continue
else:
seen_payloads.add(record["payload"]["count"])
filtered_scroll_result.append(record)
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"order_by": {
"key": "count",
"direction": direction,
}
},
"limit": 10,
"using": "dense-image",
"with_payload": True,
"group_by": "count",
"group_size": 1,
},
)
assert response.ok, response.text
query_result = response.json()["result"]
# flatten group result to match scroll result
flatten_query_result = []
for group in query_result["groups"]:
flatten_query_result.extend(group["hits"])
for record, scored_point in zip(filtered_scroll_result, flatten_query_result):
assert record.get("id") == scored_point.get("id")
assert record.get("payload") == scored_point.get("payload")
def test_discover_group(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [],
"limit": 2,
"query": {
"discover": {
"target": 5,
"context": [{"positive": 3, "negative": 4}],
}
},
"using": "dense-image",
"with_payload": True,
"group_by": "city",
"group_size": 2,
},
)
assert response.ok, response.text
groups = response.json()["result"]["groups"]
# found 2 groups has requested with `limit`
assert len(groups) == 2
# group 1
assert groups[0]["id"] == "Berlin"
assert len(groups[0]["hits"]) == 2 # group_size
assert groups[0]["hits"][0]["id"] == 1
assert groups[0]["hits"][0]["payload"]["city"] == "Berlin"
assert groups[0]["hits"][1]["id"] == 2
assert groups[0]["hits"][1]["payload"]["city"] == ["Berlin", "London"]
# group 2
assert groups[1]["id"] == "London"
assert len(groups[1]["hits"]) == 1
assert groups[1]["hits"][0]["id"] == 2
assert groups[1]["hits"][0]["payload"]["city"] == ["Berlin", "London"]
def test_recommend_lookup_group(collection_name, lookup_collection_name):
# delete lookup collection if exists
response = request_with_validation(
api='/collections/{collection_name}',
method="DELETE",
path_params={'collection_name': lookup_collection_name},
)
assert response.ok, response.text
# re-create lookup collection
response = request_with_validation(
api='/collections/{collection_name}',
method="PUT",
path_params={'collection_name': lookup_collection_name},
body={
"vectors": {
"other": {
"size": 4,
"distance": "Dot",
}
}
}
)
assert response.ok, response.text
# insert vectors to lookup collection
response = request_with_validation(
api='/collections/{collection_name}/points',
method="PUT",
path_params={'collection_name': lookup_collection_name},
query_params={'wait': 'true'},
body={
"points": [
{
"id": 1,
"vector": {"other": [10.0, 10.0, 10.0, 10.0]},
},
{
"id": 2,
"vector": {"other": [20.0, 0.0, 0.0, 0.0]},
},
]
}
)
assert response.ok, response.text
# check recommend group + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/recommend/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"positive": [1],
"negative": [2],
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
},
"group_by": "city",
"group_size": 2,
},
)
assert response.ok, response.text
recommend_result = response.json()["result"]
# check query + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"limit": 10,
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
},
"group_by": "city",
"group_size": 2,
},
)
assert response.ok, response.text
query_result = response.json()["result"]
# check equivalence recommend vs query
assert recommend_result == query_result, f"{recommend_result} != {query_result}"
# check nested query id + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [1],
"negative": [2],
},
},
"using": "dense-image",
"lookup_from": {
"collection": lookup_collection_name,
"vector": "other"
},
}
],
"group_by": "city",
"group_size": 2,
"query": {"fusion": "rrf"},
},
)
assert response.ok, response.text
nested_query_result_id = response.json()["result"]
# check nested query vector + lookup_from
response = request_with_validation(
api="/collections/{collection_name}/points/query/groups",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": [
{
"query": {
"recommend": {
"positive": [[10.0, 10.0, 10.0, 10.0]],
"negative": [[20.0, 0.0, 0.0, 0.0]],
},
},
"using": "dense-image",
}
],
"group_by": "city",
"group_size": 2,
"query": {"fusion": "rrf"},
},
)
assert response.ok, response.text
nested_query_result_vector = response.json()["result"]
# check equivalence nested query id vs nested query vector
assert nested_query_result_id == nested_query_result_vector, f"{nested_query_result_id} != {nested_query_result_vector}"
def test_random_rescore_with_offset(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": { "limit": 1 },
"query": {"sample": "random"},
},
)
assert response.ok, response.json()
random_result = response.json()["result"]["points"]
assert len(random_result) == 1
assert random_result[0]["id"] == 1
# assert offset is propagated to prefetch
seen = set()
for _ in range(100):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"prefetch": { "limit": 1 },
"query": {"sample": "random"},
"offset": 1,
},
)
assert response.ok, response.json()
random_result = response.json()["result"]["points"]
assert len(random_result) == 1
seen.add(random_result[0]["id"])
if seen == {1, 2}:
return
# Although prefetch limit is 1, offset should be propagated, so randomness is applied to points 1 and 2.
# By this point we should've seen both points.
assert False, f"after 100 tries, `seen` is expected to be {{1, 2}}, but it was {seen}"
@pytest.mark.parametrize("query_filter", [None, *get_uuid_index_filters()])
def test_nearest_query_batch(query_filter, collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/search/batch",
method="POST",
path_params={"collection_name": collection_name},
body={
"searches": [
{
"limit": 3,
"vector": {
"vector": [-1.9, 1.1, -1.1, 1.1],
"name": "dense-image"
},
"with_payload": True,
"filter": query_filter
},
{
"limit": 3,
"vector": {
"vector": [0.19, 0.83, 0.75, -0.11],
"name": "dense-image",
},
"with_payload": True,
"filter": query_filter
}
]
},
)
assert response.ok, response.text
search_result = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/query/batch",
method="POST",
path_params={"collection_name": collection_name},
body={
"searches": [
{
"limit": 3,
"query": [-1.9, 1.1, -1.1, 1.1],
"using": "dense-image",
"with_payload": True,
"filter": query_filter
},
{
"limit": 3,
"query": [0.19, 0.83, 0.75, -0.11],
"using": "dense-image",
"with_payload": True,
"filter": query_filter
}
]
},
)
assert response.ok, response.text
query_result = response.json()["result"]
assert search_result[0] == query_result[0]["points"]
assert search_result[1] == query_result[1]["points"]
@pytest.mark.parametrize("query_filter", [None, *get_uuid_index_filters()])
def test_recommend_batch(query_filter, collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/recommend/batch",
method="POST",
path_params={"collection_name": collection_name},
body={
"searches": [
{
"positive": [1, 2, 3, 4],
"negative": [3],
"limit": 10,
"using": "dense-image",
"filter": query_filter,
"with_payload": True,
},
{
"positive": [3, 4],
"negative": [4],
"limit": 10,
"using": "dense-image",
"filter": query_filter,
"with_payload": True,
}
]
},
)
assert response.ok, response.text
search_result = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/query/batch",
method="POST",
path_params={"collection_name": collection_name},
body={
"searches": [
{
"limit": 10,
"query": {"recommend": {"positive": [1, 2, 3, 4], "negative": [3]}},
"using": "dense-image",
"with_payload": True,
"filter": query_filter
},
{
"limit": 10,
"query": {"recommend": {"positive": [3, 4], "negative": [4]}},
"using": "dense-image",
"with_payload": True,
"filter": query_filter
}
]
},
)
assert response.ok, response.text
query_result = response.json()["result"]
assert search_result[0] == query_result[0]["points"]
assert search_result[1] == query_result[1]["points"]
@pytest.mark.parametrize("query_filter", [None, *get_uuid_index_filters()])
def test_discover_batch(query_filter, collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/discover/batch",
method="POST",
path_params={"collection_name": collection_name},
body={
"searches": [
{
"target": 2,
"context": [{"positive": 3, "negative": 4}],
"limit": 10,
"using": "dense-image",
"filter": query_filter,
"with_payload": True,
},
{
"target": 4,
"context": [{"positive": 1, "negative": 2}],
"limit": 10,
"using": "dense-image",
"filter": query_filter,
"with_payload": True,
}
]
},
)
assert response.ok, response.text
search_result = response.json()["result"]
response = request_with_validation(
api="/collections/{collection_name}/points/query/batch",
method="POST",
path_params={"collection_name": collection_name},
body={
"searches": [
{
"limit": 10,
"query": {
"discover": {
"target": 2,
"context": [{"positive": 3, "negative": 4}],
}
},
"using": "dense-image",
"with_payload": True,
"filter": query_filter
},
{
"limit": 10,
"query": {
"discover": {
"target": 4,
"context": [{"positive": 1, "negative": 2}],
}
},
"using": "dense-image",
"with_payload": True,
"filter": query_filter
}
]
},
)
assert response.ok, response.text
query_result = response.json()["result"]
assert search_result[0] == query_result[0]["points"]
assert search_result[1] == query_result[1]["points"]
# Qdrant did panic for some Query API requests when using a vector name that is not existing
# for the given point. This tests ensures that a proper error response gets returned.
# See https://github.com/qdrant/qdrant/issues/5208 for more details.
def test_query_with_missing_vector(collection_name):
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": 7,
"using": "sparse-text"
},
)
assert response.ok
response = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": 8, # Point with ID=8 doesn't have a vector 'sparse-text' which caused Qdrant to panic before.
"using": "sparse-text"
},
)
assert not response.ok
assert 'error' in response.json()['status']
response2 = request_with_validation(
api="/collections/{collection_name}/points/query",
method="POST",
path_params={"collection_name": collection_name},
body={
"query": 8, # Point with ID=8 doesn't have a default vector which caused Qdrant to panic before.
},
)
assert not response2.ok
assert 'error' in response2.json()['status']