colibri.qdrant / tests /consensus_tests /test_collection_shard_transfer.py
Gouzi Mohaled
Ajout du dossier tests
3932407
import pathlib
from .utils import *
from .assertions import assert_http_ok
N_PEERS = 2
N_SHARDS = 2
N_REPLICA = 1
def test_collection_shard_transfer(tmp_path: pathlib.Path):
assert_project_root()
peer_dirs = make_peer_folders(tmp_path, N_PEERS)
# Gathers REST API uris
peer_api_uris = []
# Start bootstrap
(bootstrap_api_uri, bootstrap_uri) = start_first_peer(
peer_dirs[0], "peer_0_0.log")
peer_api_uris.append(bootstrap_api_uri)
# Wait for leader
leader = wait_peer_added(bootstrap_api_uri)
# Start other peers
for i in range(1, len(peer_dirs)):
peer_api_uris.append(start_peer(
peer_dirs[i], f"peer_0_{i}.log", bootstrap_uri))
# Wait for cluster
wait_for_uniform_cluster_status(peer_api_uris, leader)
# Check that there are no collections on all peers
for uri in peer_api_uris:
r = requests.get(f"{uri}/collections")
assert_http_ok(r)
assert len(r.json()["result"]["collections"]) == 0
# Create collection in first peer
r = requests.put(
f"{peer_api_uris[0]}/collections/test_collection", json={
"vectors": {
"size": 4,
"distance": "Dot"
},
"shard_number": N_SHARDS,
"replication_factor": N_REPLICA,
})
assert_http_ok(r)
# Check that it exists on all peers
wait_collection_exists_and_active_on_all_peers(collection_name="test_collection", peer_api_uris=peer_api_uris)
# Check collection's cluster info
collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection")
assert collection_cluster_info["shard_count"] == N_SHARDS
# Create points in first peer's collection
r = requests.put(
f"{peer_api_uris[0]}/collections/test_collection/points?wait=true", json={
"points": [
{
"id": 1,
"vector": [0.05, 0.61, 0.76, 0.74],
"payload": {
"city": "Berlin",
"country": "Germany",
"count": 1000000,
"square": 12.5,
"coords": {"lat": 1.0, "lon": 2.0}
}
},
{"id": 2, "vector": [0.19, 0.81, 0.75, 0.11],
"payload": {"city": ["Berlin", "London"]}},
{"id": 3, "vector": [0.36, 0.55, 0.47, 0.94],
"payload": {"city": ["Berlin", "Moscow"]}},
{"id": 4, "vector": [0.18, 0.01, 0.85, 0.80],
"payload": {"city": ["London", "Moscow"]}},
{"id": 5, "vector": [0.24, 0.18, 0.22, 0.44],
"payload": {"count": [0]}},
{"id": 6, "vector": [0.35, 0.08, 0.11, 0.44]}
]
})
assert_http_ok(r)
# Check that 'search' returns the same results on all peers
for uri in peer_api_uris:
r = requests.post(
f"{uri}/collections/test_collection/points/search", json={
"vector": [0.2, 0.1, 0.9, 0.7],
"top": 3,
}
)
assert_http_ok(r)
assert r.json()["result"][0]["id"] == 4
assert r.json()["result"][1]["id"] == 1
assert r.json()["result"][2]["id"] == 3
# Extract current collection cluster info
collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection")
target_peer_id = collection_cluster_info["remote_shards"][0]["peer_id"]
source_uri = peer_api_uris[0]
target_uri = peer_api_uris[1]
target_collection_cluster_info = get_collection_cluster_info(target_uri, "test_collection")
target_before_local_shard_count = len(target_collection_cluster_info["local_shards"])
before_local_shard_count = len(collection_cluster_info["local_shards"])
shard_id = collection_cluster_info["local_shards"][0]["shard_id"]
source_peer_id = collection_cluster_info["peer_id"]
# Test that we cannot move shard to ourselves
r = requests.post(
f"{source_uri}/collections/test_collection/cluster", json={
"move_shard": {
"shard_id": shard_id,
"from_peer_id": source_peer_id,
"to_peer_id": source_peer_id
}
})
assert not r.ok
assert r.status_code == 422
assert r.json()["status"]["error"].__contains__("Validation error in JSON body: [move_shard.to_peer_id: cannot transfer shard to itself")
# Move shard `shard_id` to peer `target_peer_id`
r = requests.post(
f"{source_uri}/collections/test_collection/cluster", json={
"move_shard": {
"shard_id": shard_id,
"from_peer_id": source_peer_id,
"to_peer_id": target_peer_id
}
})
assert_http_ok(r)
# Wait for end of shard transfer
wait_for_collection_shard_transfers_count(source_uri, "test_collection", 0)
# Check the number of local shards goes down by 1
assert check_collection_local_shards_count(source_uri, "test_collection", before_local_shard_count - 1)
assert check_collection_local_shards_count(target_uri, "test_collection", target_before_local_shard_count + 1)
# Check that 'search' returns the same results on all peers
for uri in peer_api_uris:
r = requests.post(
f"{uri}/collections/test_collection/points/search", json={
"vector": [0.2, 0.1, 0.9, 0.7],
"top": 3,
}
)
assert_http_ok(r)
assert r.json()["result"][0]["id"] == 4
assert r.json()["result"][1]["id"] == 1
assert r.json()["result"][2]["id"] == 3
# Replicate shards back to the source peer
r = requests.post(
f"{source_uri}/collections/test_collection/cluster", json={
"replicate_shard": {
"shard_id": shard_id,
"from_peer_id": target_peer_id,
"to_peer_id": source_peer_id
}
})
assert_http_ok(r)
# Wait for end of shard transfer
wait_for_collection_shard_transfers_count(source_uri, "test_collection", 0)
# Check that the number of local shard goes back to the original value
assert check_collection_local_shards_count(source_uri, "test_collection", before_local_shard_count)
assert check_collection_local_shards_count(target_uri, "test_collection", target_before_local_shard_count + 1)
# Check that 'search' returns the same results on all peers
for uri in peer_api_uris:
r = requests.post(
f"{uri}/collections/test_collection/points/search", json={
"vector": [0.2, 0.1, 0.9, 0.7],
"top": 3,
}
)
assert_http_ok(r)
assert r.json()["result"][0]["id"] == 4
assert r.json()["result"][1]["id"] == 1
assert r.json()["result"][2]["id"] == 3
# Perform a replication for the second time with the target node active
r = requests.post(
f"{source_uri}/collections/test_collection/cluster", json={
"replicate_shard": {
"shard_id": shard_id,
"from_peer_id": target_peer_id,
"to_peer_id": source_peer_id
}
})
assert_http_ok(r)
# Wait for end of shard transfer
wait_for_collection_shard_transfers_count(source_uri, "test_collection", 0)
# Check that the number of local shard is still the same
assert check_collection_local_shards_count(source_uri, "test_collection", before_local_shard_count)
assert check_collection_local_shards_count(target_uri, "test_collection", target_before_local_shard_count + 1)
# Check that 'search' returns the same results on all peers
for uri in peer_api_uris:
r = requests.post(
f"{uri}/collections/test_collection/points/search", json={
"vector": [0.2, 0.1, 0.9, 0.7],
"top": 3,
}
)
assert_http_ok(r)
assert r.json()["result"][0]["id"] == 4
assert r.json()["result"][1]["id"] == 1
assert r.json()["result"][2]["id"] == 3