Spaces:
Build error
Build error
import pathlib | |
from .utils import * | |
from .assertions import assert_http_ok | |
N_PEERS = 5 | |
N_SHARDS = 4 | |
N_REPLICA = 2 | |
def test_points_search(tmp_path: pathlib.Path): | |
assert_project_root() | |
peer_dirs = make_peer_folders(tmp_path, N_PEERS) | |
# Gathers REST API uris | |
peer_api_uris = [] | |
# Start bootstrap | |
(bootstrap_api_uri, bootstrap_uri) = start_first_peer( | |
peer_dirs[0], "peer_0_0.log") | |
peer_api_uris.append(bootstrap_api_uri) | |
# Wait for leader | |
leader = wait_peer_added(bootstrap_api_uri) | |
# Start other peers | |
for i in range(1, len(peer_dirs)): | |
peer_api_uris.append(start_peer( | |
peer_dirs[i], f"peer_0_{i}.log", bootstrap_uri)) | |
# Wait for cluster | |
wait_for_uniform_cluster_status(peer_api_uris, leader) | |
# Check that there are no collections on all peers | |
for uri in peer_api_uris: | |
r_batch = requests.get(f"{uri}/collections") | |
assert_http_ok(r_batch) | |
assert len(r_batch.json()["result"]["collections"]) == 0 | |
# Create collection in first peer | |
r_batch = requests.put( | |
f"{peer_api_uris[0]}/collections/test_collection", json={ | |
"vectors": { | |
"size": 4, | |
"distance": "Dot" | |
}, | |
"shard_number": N_SHARDS, | |
"replication_factor": N_REPLICA, | |
}) | |
assert_http_ok(r_batch) | |
# Check that it exists on all peers | |
wait_collection_exists_and_active_on_all_peers(collection_name="test_collection", peer_api_uris=peer_api_uris) | |
# Check collection's cluster info | |
collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection") | |
assert collection_cluster_info["shard_count"] == N_SHARDS | |
# Create points in first peer's collection | |
r_batch = requests.put( | |
f"{peer_api_uris[0]}/collections/test_collection/points?wait=true", json={ | |
"points": [ | |
{ | |
"id": 1, | |
"vector": [0.05, 0.61, 0.76, 0.74], | |
"payload": { | |
"city": "Berlin", | |
"country": "Germany", | |
"count": 1000000, | |
"square": 12.5, | |
"coords": {"lat": 1.0, "lon": 2.0} | |
} | |
}, | |
{"id": 2, "vector": [0.19, 0.81, 0.75, 0.11], | |
"payload": {"city": ["Berlin", "London"]}}, | |
{"id": 3, "vector": [0.36, 0.55, 0.47, 0.94], | |
"payload": {"city": ["Berlin", "Moscow"]}}, | |
{"id": 4, "vector": [0.18, 0.01, 0.85, 0.80], | |
"payload": {"city": ["London", "Moscow"]}}, | |
{"id": 5, "vector": [0.24, 0.18, 0.22, 0.44], | |
"payload": {"count": [0]}}, | |
{"id": 6, "vector": [0.35, 0.08, 0.11, 0.44]}, | |
{"id": 7, "vector": [0.45, 0.07, 0.21, 0.04]}, | |
{"id": 8, "vector": [0.75, 0.18, 0.91, 0.48]}, | |
{"id": 9, "vector": [0.30, 0.01, 0.1, 0.12]}, | |
{"id": 10, "vector": [0.95, 0.8, 0.17, 0.19]} | |
] | |
}) | |
assert_http_ok(r_batch) | |
# Check that 'search' & `search_batch` return the same results on all peers | |
q = { | |
"vector": [0.2, 0.1, 0.9, 0.7], | |
"top": 3, | |
"offset": 1, | |
"with_vector": True, | |
"with_payload": True, | |
"score_threshold": 0.1 | |
} | |
# Capture result from first peer | |
r_init_search = requests.post( | |
f"{peer_api_uris[0]}/collections/test_collection/points/search", json=q | |
).json()["result"] | |
for uri in peer_api_uris: | |
r_search = requests.post( | |
f"{uri}/collections/test_collection/points/search", json=q | |
) | |
assert_http_ok(r_search) | |
r_batch = requests.post( | |
f"{uri}/collections/test_collection/points/search/batch", json={ | |
"searches": [q] | |
} | |
) | |
assert_http_ok(r_batch) | |
# only one search in the batch | |
assert len(r_batch.json()["result"]) == 1 | |
# assert same number of results | |
assert len(r_search.json()["result"]) == len(r_batch.json()["result"][0]) | |
# assert stable across peers | |
assert r_search.json()["result"] == r_init_search | |
# search equivalent to single batch | |
assert r_search.json()["result"] == r_batch.json()["result"][0] | |
# Check that `search_batch` return the same results on all peers for duplicated searches | |
for uri in peer_api_uris: | |
r_batch = requests.post( | |
f"{uri}/collections/test_collection/points/search/batch", json={ | |
"searches": [q, q, q, q] | |
} | |
) | |
assert_http_ok(r_batch) | |
# assert num searches | |
assert len(r_batch.json()["result"]) == 4 | |
# assert the search limit | |
assert len(r_batch.json()["result"][0]) == 3 | |
assert len(r_batch.json()["result"][1]) == 3 | |
assert len(r_batch.json()["result"][2]) == 3 | |
assert len(r_batch.json()["result"][3]) == 3 | |
assert r_batch.json()["result"] == [r_init_search, r_init_search, r_init_search, r_init_search] | |
# Check that `search_batch` return the same results on all peers compared to multiple searches | |
q1 = { | |
"vector": [0.2, 0.1, 0.9, 0.7], | |
"top": 3, | |
"offset": 1, | |
"with_vector": True | |
} | |
q2 = { | |
"vector": [0.1, 0.2, 0.9, 0.7], | |
"top": 5, | |
"offset": 3, | |
"with_payload": True, | |
} | |
q3 = { | |
"vector": [0.2, 0.1, 0.7, 0.9], | |
"top": 10, | |
"score_threshold": 1.1 | |
} | |
for uri in peer_api_uris: | |
r_batch = requests.post( | |
f"{uri}/collections/test_collection/points/search/batch", json={ | |
"searches": [q1, q2, q3] | |
} | |
) | |
assert_http_ok(r_batch) | |
r_search_1 = requests.post( | |
f"{uri}/collections/test_collection/points/search", json=q1 | |
) | |
assert_http_ok(r_search_1) | |
r_search_2 = requests.post( | |
f"{uri}/collections/test_collection/points/search", json=q2 | |
) | |
assert_http_ok(r_search_2) | |
r_search_3 = requests.post( | |
f"{uri}/collections/test_collection/points/search", json=q3 | |
) | |
assert_http_ok(r_search_3) | |
accumulated = [r_search_1.json()["result"], r_search_2.json()["result"], r_search_3.json()["result"]] | |
assert accumulated == r_batch.json()["result"] | |