colibri.qdrant / tests /consensus_tests /test_points_search.py
Gouzi Mohaled
Ajout du dossier tests
3932407
import pathlib
from .utils import *
from .assertions import assert_http_ok
N_PEERS = 5
N_SHARDS = 4
N_REPLICA = 2
def test_points_search(tmp_path: pathlib.Path):
assert_project_root()
peer_dirs = make_peer_folders(tmp_path, N_PEERS)
# Gathers REST API uris
peer_api_uris = []
# Start bootstrap
(bootstrap_api_uri, bootstrap_uri) = start_first_peer(
peer_dirs[0], "peer_0_0.log")
peer_api_uris.append(bootstrap_api_uri)
# Wait for leader
leader = wait_peer_added(bootstrap_api_uri)
# Start other peers
for i in range(1, len(peer_dirs)):
peer_api_uris.append(start_peer(
peer_dirs[i], f"peer_0_{i}.log", bootstrap_uri))
# Wait for cluster
wait_for_uniform_cluster_status(peer_api_uris, leader)
# Check that there are no collections on all peers
for uri in peer_api_uris:
r_batch = requests.get(f"{uri}/collections")
assert_http_ok(r_batch)
assert len(r_batch.json()["result"]["collections"]) == 0
# Create collection in first peer
r_batch = requests.put(
f"{peer_api_uris[0]}/collections/test_collection", json={
"vectors": {
"size": 4,
"distance": "Dot"
},
"shard_number": N_SHARDS,
"replication_factor": N_REPLICA,
})
assert_http_ok(r_batch)
# Check that it exists on all peers
wait_collection_exists_and_active_on_all_peers(collection_name="test_collection", peer_api_uris=peer_api_uris)
# Check collection's cluster info
collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection")
assert collection_cluster_info["shard_count"] == N_SHARDS
# Create points in first peer's collection
r_batch = requests.put(
f"{peer_api_uris[0]}/collections/test_collection/points?wait=true", json={
"points": [
{
"id": 1,
"vector": [0.05, 0.61, 0.76, 0.74],
"payload": {
"city": "Berlin",
"country": "Germany",
"count": 1000000,
"square": 12.5,
"coords": {"lat": 1.0, "lon": 2.0}
}
},
{"id": 2, "vector": [0.19, 0.81, 0.75, 0.11],
"payload": {"city": ["Berlin", "London"]}},
{"id": 3, "vector": [0.36, 0.55, 0.47, 0.94],
"payload": {"city": ["Berlin", "Moscow"]}},
{"id": 4, "vector": [0.18, 0.01, 0.85, 0.80],
"payload": {"city": ["London", "Moscow"]}},
{"id": 5, "vector": [0.24, 0.18, 0.22, 0.44],
"payload": {"count": [0]}},
{"id": 6, "vector": [0.35, 0.08, 0.11, 0.44]},
{"id": 7, "vector": [0.45, 0.07, 0.21, 0.04]},
{"id": 8, "vector": [0.75, 0.18, 0.91, 0.48]},
{"id": 9, "vector": [0.30, 0.01, 0.1, 0.12]},
{"id": 10, "vector": [0.95, 0.8, 0.17, 0.19]}
]
})
assert_http_ok(r_batch)
# Check that 'search' & `search_batch` return the same results on all peers
q = {
"vector": [0.2, 0.1, 0.9, 0.7],
"top": 3,
"offset": 1,
"with_vector": True,
"with_payload": True,
"score_threshold": 0.1
}
# Capture result from first peer
r_init_search = requests.post(
f"{peer_api_uris[0]}/collections/test_collection/points/search", json=q
).json()["result"]
for uri in peer_api_uris:
r_search = requests.post(
f"{uri}/collections/test_collection/points/search", json=q
)
assert_http_ok(r_search)
r_batch = requests.post(
f"{uri}/collections/test_collection/points/search/batch", json={
"searches": [q]
}
)
assert_http_ok(r_batch)
# only one search in the batch
assert len(r_batch.json()["result"]) == 1
# assert same number of results
assert len(r_search.json()["result"]) == len(r_batch.json()["result"][0])
# assert stable across peers
assert r_search.json()["result"] == r_init_search
# search equivalent to single batch
assert r_search.json()["result"] == r_batch.json()["result"][0]
# Check that `search_batch` return the same results on all peers for duplicated searches
for uri in peer_api_uris:
r_batch = requests.post(
f"{uri}/collections/test_collection/points/search/batch", json={
"searches": [q, q, q, q]
}
)
assert_http_ok(r_batch)
# assert num searches
assert len(r_batch.json()["result"]) == 4
# assert the search limit
assert len(r_batch.json()["result"][0]) == 3
assert len(r_batch.json()["result"][1]) == 3
assert len(r_batch.json()["result"][2]) == 3
assert len(r_batch.json()["result"][3]) == 3
assert r_batch.json()["result"] == [r_init_search, r_init_search, r_init_search, r_init_search]
# Check that `search_batch` return the same results on all peers compared to multiple searches
q1 = {
"vector": [0.2, 0.1, 0.9, 0.7],
"top": 3,
"offset": 1,
"with_vector": True
}
q2 = {
"vector": [0.1, 0.2, 0.9, 0.7],
"top": 5,
"offset": 3,
"with_payload": True,
}
q3 = {
"vector": [0.2, 0.1, 0.7, 0.9],
"top": 10,
"score_threshold": 1.1
}
for uri in peer_api_uris:
r_batch = requests.post(
f"{uri}/collections/test_collection/points/search/batch", json={
"searches": [q1, q2, q3]
}
)
assert_http_ok(r_batch)
r_search_1 = requests.post(
f"{uri}/collections/test_collection/points/search", json=q1
)
assert_http_ok(r_search_1)
r_search_2 = requests.post(
f"{uri}/collections/test_collection/points/search", json=q2
)
assert_http_ok(r_search_2)
r_search_3 = requests.post(
f"{uri}/collections/test_collection/points/search", json=q3
)
assert_http_ok(r_search_3)
accumulated = [r_search_1.json()["result"], r_search_2.json()["result"], r_search_3.json()["result"]]
assert accumulated == r_batch.json()["result"]