import pathlib from .utils import * from .assertions import assert_http_ok N_PEERS = 5 N_SHARDS = 4 N_REPLICA = 2 def test_points_query(tmp_path: pathlib.Path): assert_project_root() peer_dirs = make_peer_folders(tmp_path, N_PEERS) # Gathers REST API uris peer_api_uris = [] # Start bootstrap (bootstrap_api_uri, bootstrap_uri) = start_first_peer( peer_dirs[0], "peer_0_0.log") peer_api_uris.append(bootstrap_api_uri) # Wait for leader leader = wait_peer_added(bootstrap_api_uri) # Start other peers for i in range(1, len(peer_dirs)): peer_api_uris.append(start_peer( peer_dirs[i], f"peer_0_{i}.log", bootstrap_uri)) # Wait for cluster wait_for_uniform_cluster_status(peer_api_uris, leader) # Check that there are no collections on all peers for uri in peer_api_uris: r_two = requests.get(f"{uri}/collections") assert_http_ok(r_two) assert len(r_two.json()["result"]["collections"]) == 0 # Create collection in first peer r_two = requests.put( f"{peer_api_uris[0]}/collections/test_collection", json={ "vectors": { "size": 4, "distance": "Dot" }, "shard_number": N_SHARDS, "replication_factor": N_REPLICA, }) assert_http_ok(r_two) # add index on count field r_index = requests.put( f"{peer_api_uris[0]}/collections/test_collection/index?wait=true", json={ "field_name": "count", "field_schema": "integer" }) assert_http_ok(r_index) # Check that it exists on all peers wait_collection_exists_and_active_on_all_peers(collection_name="test_collection", peer_api_uris=peer_api_uris) # Check collection's cluster info collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection") assert collection_cluster_info["shard_count"] == N_SHARDS # Create points in first peer's collection r_two = requests.put( f"{peer_api_uris[0]}/collections/test_collection/points?wait=true", json={ "points": [ { "id": 1, "vector": [0.05, 0.61, 0.76, 0.74], "payload": { "city": "Berlin", "country": "Germany", "count": 1000000, "square": 12.5, "coords": { "lat": 1.0, "lon": 2.0 } } }, { "id": 2, "vector": [0.19, 0.81, 0.75, 0.11], "payload": { "city": ["Berlin", "London"] } }, { "id": 3, "vector": [0.36, 0.55, 0.47, 0.94], "payload": { "city": ["Berlin", "Moscow"], "count": 2, } }, { "id": 4, "vector": [0.18, 0.01, 0.85, 0.80], "payload": { "city": ["London", "Moscow"] } }, { "id": 5, "vector": [0.24, 0.18, 0.22, 0.44], "payload": { "count": 1, } }, { "id": 6, "vector": [0.35, 0.08, 0.11, 0.44], "payload": { "count": 4, } }, { "id": 7, "vector": [0.45, 0.07, 0.21, 0.04], "payload": { "count": 2, } }, { "id": 8, "vector": [0.75, 0.18, 0.91, 0.48] }, { "id": 9, "vector": [0.30, 0.01, 0.1, 0.12], "payload": { "count": 3, } }, { "id": 10, "vector": [0.95, 0.8, 0.17, 0.19], "payload": { "count": 3, } } ] }) assert_http_ok(r_two) # a filter to reuse in multiple requests filter = { "must_not": [ { "key": "city", "match": { "value": "Berlin" } } ] } # pairs of requests that should produce the same results # each element is ("request path", "extract key", "request body") list_of_equivalences = [ # nearest search & query with filter ( ("search", None, { "vector": [0.2, 0.1, 0.9, 0.7], "limit": 5, "offset": 1, "filter": filter, "with_vector": True, "with_payload": True, "score_threshold": 0.5 }), ("query", None,{ "query": [0.2, 0.1, 0.9, 0.7], "limit": 5, "offset": 1, "filter": filter, "with_vector": True, "with_payload": True, "score_threshold": 0.5 }) ), # recommend & query recommend ( ("recommend", None, { "positive": [1, 2, 3, 4], "negative": [3], "limit": 5, }), ("query", None, { "query": { "recommend": { "positive": [1, 2, 3, 4], "negative": [3], } }, "limit": 5, }) ), # discover & query discover ( ("discover", None, { "target": 2, "context": [{"positive": 3, "negative": 4}], "limit": 5, }), ("query", None, { "query": { "discover": { "target": 2, "context": [{"positive": 3, "negative": 4}], } }, "limit": 5, }) ), # context & query context ( ("discover", None, { "context": [{"positive": 2, "negative": 4}], "limit": 5, }), ("query", None, { "query": { "context": [{"positive": 2, "negative": 4}] }, "limit": 5, }) ), # request filter & source filters ( ("query", None, { "prefetch": [ { "query": [0.2, 0.1, 0.9, 0.7], "filter": filter, } ], "query": {"fusion": "rrf"}, "limit": 5, "offset": 1, "with_vector": True, "with_payload": True, "score_threshold": 0.5 }), ("query", None, { "prefetch": [ { "query": [0.2, 0.1, 0.9, 0.7], } ], "query": {"fusion": "rrf"}, "limit": 5, "offset": 1, "filter": filter, "with_vector": True, "with_payload": True, "score_threshold": 0.5 }) ), ( # scroll ("scroll", "points.id", { "filter": filter, "limit": 5, }), ("query", "id", { "filter": filter, "limit": 5, "with_payload": True, }), ), ( # scroll order by `asc` ("scroll", "points.id", { "filter": filter, "limit": 5, "order_by": "count", "direction": "asc", }), ("query", "id", { "filter": filter, "limit": 5, "query": { "order_by": { "key": "count", "direction": "asc", } } }), ), ( # scroll order by `desc` ("scroll", "points.id", { "filter": filter, "limit": 5, "order_by": "count", "direction": "desc", }), ("query", "id", { "filter": filter, "limit": 5, "query": { "order_by": { "key": "count", "direction": "desc", } } }), ) ] # Verify that the results are the same across all peers for (action1, extract1, body1), (action2, extract2, body2) in list_of_equivalences: # Capture result from first peer r_init_one = requests.post( f"{peer_api_uris[0]}/collections/test_collection/points/{action1}", params={"consistency":"all"}, json=body1 ) assert_http_ok(r_init_one) r_init_one = get_results(action1, r_init_one.json()) if extract1: r_init_one = apply_json_path(r_init_one, extract1) # Loop through all peers for uri in peer_api_uris: # first request r_one = requests.post( f"{uri}/collections/test_collection/points/{action1}", params={"consistency":"all"}, json=body1 ) assert_http_ok(r_one) r_one = get_results(action1, r_one.json()) if extract1: r_one = apply_json_path(r_one, extract1) # second request r_two = requests.post( f"{uri}/collections/test_collection/points/{action2}", params={"consistency":"all"}, json=body2 ) assert_http_ok(r_two) r_two = get_results(action2, r_two.json()) if extract2: r_two = apply_json_path(r_two, extract2) # assert same number of results assert len(r_one) == len(r_two), f"Different number of results for {action1} and {action2}" # search equivalent results assert set(str(d) for d in r_one) == set(str(d) for d in r_two), f"Different results for {action1} and {action2}" # assert stable across peers assert set(str(d) for d in r_one) == set(str(d) for d in r_init_one), f"Different results for {action1} and {action2}" def get_results(action_name, res_json): if action_name == "query": return res_json["result"]["points"] return res_json["result"] def apply_json_path(json_obj, json_path): if json_path is None: return json_obj for key in json_path.split("."): if isinstance(json_obj, list): # return [apply_json_path(item, key) for item in json_obj] return [item[key] for item in json_obj] json_obj = json_obj[key] return json_obj