import pathlib from .utils import * from .assertions import assert_http_ok N_PEERS = 2 N_SHARDS = 2 N_REPLICA = 1 def test_collection_shard_transfer(tmp_path: pathlib.Path): assert_project_root() peer_dirs = make_peer_folders(tmp_path, N_PEERS) # Gathers REST API uris peer_api_uris = [] # Start bootstrap (bootstrap_api_uri, bootstrap_uri) = start_first_peer( peer_dirs[0], "peer_0_0.log") peer_api_uris.append(bootstrap_api_uri) # Wait for leader leader = wait_peer_added(bootstrap_api_uri) # Start other peers for i in range(1, len(peer_dirs)): peer_api_uris.append(start_peer( peer_dirs[i], f"peer_0_{i}.log", bootstrap_uri)) # Wait for cluster wait_for_uniform_cluster_status(peer_api_uris, leader) # Check that there are no collections on all peers for uri in peer_api_uris: r = requests.get(f"{uri}/collections") assert_http_ok(r) assert len(r.json()["result"]["collections"]) == 0 # Create collection in first peer r = requests.put( f"{peer_api_uris[0]}/collections/test_collection", json={ "vectors": { "size": 4, "distance": "Dot" }, "shard_number": N_SHARDS, "replication_factor": N_REPLICA, }) assert_http_ok(r) # Check that it exists on all peers wait_collection_exists_and_active_on_all_peers(collection_name="test_collection", peer_api_uris=peer_api_uris) # Check collection's cluster info collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection") assert collection_cluster_info["shard_count"] == N_SHARDS # Create points in first peer's collection r = requests.put( f"{peer_api_uris[0]}/collections/test_collection/points?wait=true", json={ "points": [ { "id": 1, "vector": [0.05, 0.61, 0.76, 0.74], "payload": { "city": "Berlin", "country": "Germany", "count": 1000000, "square": 12.5, "coords": {"lat": 1.0, "lon": 2.0} } }, {"id": 2, "vector": [0.19, 0.81, 0.75, 0.11], "payload": {"city": ["Berlin", "London"]}}, {"id": 3, "vector": [0.36, 0.55, 0.47, 0.94], "payload": {"city": ["Berlin", "Moscow"]}}, {"id": 4, "vector": [0.18, 0.01, 0.85, 0.80], "payload": {"city": ["London", "Moscow"]}}, {"id": 5, "vector": [0.24, 0.18, 0.22, 0.44], "payload": {"count": [0]}}, {"id": 6, "vector": [0.35, 0.08, 0.11, 0.44]} ] }) assert_http_ok(r) # Check that 'search' returns the same results on all peers for uri in peer_api_uris: r = requests.post( f"{uri}/collections/test_collection/points/search", json={ "vector": [0.2, 0.1, 0.9, 0.7], "top": 3, } ) assert_http_ok(r) assert r.json()["result"][0]["id"] == 4 assert r.json()["result"][1]["id"] == 1 assert r.json()["result"][2]["id"] == 3 # Extract current collection cluster info collection_cluster_info = get_collection_cluster_info(peer_api_uris[0], "test_collection") target_peer_id = collection_cluster_info["remote_shards"][0]["peer_id"] source_uri = peer_api_uris[0] target_uri = peer_api_uris[1] target_collection_cluster_info = get_collection_cluster_info(target_uri, "test_collection") target_before_local_shard_count = len(target_collection_cluster_info["local_shards"]) before_local_shard_count = len(collection_cluster_info["local_shards"]) shard_id = collection_cluster_info["local_shards"][0]["shard_id"] source_peer_id = collection_cluster_info["peer_id"] # Test that we cannot move shard to ourselves r = requests.post( f"{source_uri}/collections/test_collection/cluster", json={ "move_shard": { "shard_id": shard_id, "from_peer_id": source_peer_id, "to_peer_id": source_peer_id } }) assert not r.ok assert r.status_code == 422 assert r.json()["status"]["error"].__contains__("Validation error in JSON body: [move_shard.to_peer_id: cannot transfer shard to itself") # Move shard `shard_id` to peer `target_peer_id` r = requests.post( f"{source_uri}/collections/test_collection/cluster", json={ "move_shard": { "shard_id": shard_id, "from_peer_id": source_peer_id, "to_peer_id": target_peer_id } }) assert_http_ok(r) # Wait for end of shard transfer wait_for_collection_shard_transfers_count(source_uri, "test_collection", 0) # Check the number of local shards goes down by 1 assert check_collection_local_shards_count(source_uri, "test_collection", before_local_shard_count - 1) assert check_collection_local_shards_count(target_uri, "test_collection", target_before_local_shard_count + 1) # Check that 'search' returns the same results on all peers for uri in peer_api_uris: r = requests.post( f"{uri}/collections/test_collection/points/search", json={ "vector": [0.2, 0.1, 0.9, 0.7], "top": 3, } ) assert_http_ok(r) assert r.json()["result"][0]["id"] == 4 assert r.json()["result"][1]["id"] == 1 assert r.json()["result"][2]["id"] == 3 # Replicate shards back to the source peer r = requests.post( f"{source_uri}/collections/test_collection/cluster", json={ "replicate_shard": { "shard_id": shard_id, "from_peer_id": target_peer_id, "to_peer_id": source_peer_id } }) assert_http_ok(r) # Wait for end of shard transfer wait_for_collection_shard_transfers_count(source_uri, "test_collection", 0) # Check that the number of local shard goes back to the original value assert check_collection_local_shards_count(source_uri, "test_collection", before_local_shard_count) assert check_collection_local_shards_count(target_uri, "test_collection", target_before_local_shard_count + 1) # Check that 'search' returns the same results on all peers for uri in peer_api_uris: r = requests.post( f"{uri}/collections/test_collection/points/search", json={ "vector": [0.2, 0.1, 0.9, 0.7], "top": 3, } ) assert_http_ok(r) assert r.json()["result"][0]["id"] == 4 assert r.json()["result"][1]["id"] == 1 assert r.json()["result"][2]["id"] == 3 # Perform a replication for the second time with the target node active r = requests.post( f"{source_uri}/collections/test_collection/cluster", json={ "replicate_shard": { "shard_id": shard_id, "from_peer_id": target_peer_id, "to_peer_id": source_peer_id } }) assert_http_ok(r) # Wait for end of shard transfer wait_for_collection_shard_transfers_count(source_uri, "test_collection", 0) # Check that the number of local shard is still the same assert check_collection_local_shards_count(source_uri, "test_collection", before_local_shard_count) assert check_collection_local_shards_count(target_uri, "test_collection", target_before_local_shard_count + 1) # Check that 'search' returns the same results on all peers for uri in peer_api_uris: r = requests.post( f"{uri}/collections/test_collection/points/search", json={ "vector": [0.2, 0.1, 0.9, 0.7], "top": 3, } ) assert_http_ok(r) assert r.json()["result"][0]["id"] == 4 assert r.json()["result"][1]["id"] == 1 assert r.json()["result"][2]["id"] == 3