Spaces:
Build error
Build error
File size: 13,025 Bytes
3932407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 |
import io
import pathlib
import shutil
from time import sleep
from typing import Any
from consensus_tests.fixtures import create_collection, upsert_random_points, drop_collection
import requests
from .utils import *
N_PEERS = 3
N_REPLICA = 2
N_SHARDS = 3
def test_rejoin_cluster(tmp_path: pathlib.Path):
assert_project_root()
# Start cluster
peer_api_uris, peer_dirs, bootstrap_uri = start_cluster(tmp_path, N_PEERS, port_seed=10000)
create_collection(peer_api_uris[0], shard_number=N_SHARDS, replication_factor=N_REPLICA)
wait_collection_exists_and_active_on_all_peers(collection_name="test_collection", peer_api_uris=peer_api_uris)
upsert_random_points(peer_api_uris[0], 100)
# Stop last node
p = processes.pop()
p.kill()
# Validate upsert works with the dead node
upsert_random_points(peer_api_uris[0], 100)
# Assert that there are dead replicas
wait_for_some_replicas_not_active(peer_api_uris[0], "test_collection")
# Repeatedly drop, re-create collection and add data to it to accumulate Raft log entries
for i in range(0, 2):
print(f"creating collection {i}")
# Drop test_collection
drop_collection(peer_api_uris[0], "test_collection", timeout=5)
# Re-create test_collection
create_collection(peer_api_uris[0], shard_number=N_SHARDS, replication_factor=N_REPLICA, timeout=3)
# Collection might not be ready yet, we don't care
upsert_random_points(peer_api_uris[0], 100)
print(f"before recovery end {i}")
res = requests.get(f"{peer_api_uris[1]}/collections")
print(res.json())
# Create new collection unknown to the dead node
create_collection(
peer_api_uris[0],
"test_collection2",
shard_number=N_SHARDS,
replication_factor=N_REPLICA,
timeout=3
)
# Restart last node
new_url = start_peer(peer_dirs[-1], "peer_0_restarted.log", bootstrap_uri, port=20000)
peer_api_uris[-1] = new_url
# Wait for restarted node to be up and ready
wait_all_peers_up([new_url])
# Repeatedly drop, re-create collection and add data to it to accumulate Raft log entries
for i in range(0, 5):
print(f"after recovery start {i}")
# Drop test_collection
drop_collection(peer_api_uris[0], "test_collection", timeout=5)
# Re-create test_collection
create_collection(peer_api_uris[0], shard_number=N_SHARDS, replication_factor=N_REPLICA, timeout=3)
upsert_random_points(peer_api_uris[0], 500, fail_on_error=False)
print(f"after recovery end {i}")
res = requests.get(f"{new_url}/collections")
print(res.json())
wait_for_all_replicas_active(peer_api_uris[0], "test_collection2")
# Assert that the restarted node has recovered the new collection
wait_for_all_replicas_active(new_url, "test_collection2")
def test_rejoin_origin_from_wal(tmp_path: pathlib.Path):
"""
This test checks that origin peer (first peer of the cluster) commits its own peer ID to consensus.
- remove origin peer from cluster
- modify second peer's `raft_state.json`, so that it does *not* provide origin peer ID and URL
when bootstrapping new peer
- add new peer to the cluster (bootstrapping from second peer), and check that it has valid
state after it syncs with consensus
- if new peer has valid state at the end of the test, it means it received correct origin peer
ID and URL from consensus
"""
# Overwrite `first_voter` peer
def overwrite_first_voter(state: dict[str, Any], _: Any):
state["first_voter"] = state["this_peer_id"]
return state
rejoin_cluster_test(tmp_path, start_cluster, overwrite_first_voter)
def test_rejoin_origin_from_state(tmp_path: pathlib.Path):
"""
This test checks that Qdrant persists origin peer ID (`first_voter` field in `raft_state.json`)
and propagates fake origin peer URL when bootstrapping new peer.
- start cluster using *preconfigured* origin peer that does *not* have origin peer ID and URL
committed to consensus
- remove origin peer from cluster
- assert that second peer's `raft_state.json` contains valid origin peer ID
- add new peer to the cluster (bootstrapping from second peer), and check that it has valid
state after it syncs with consensus
- if new peer has valid state at the end of the test, it means it received correct origin peer
ID and (fake) URL from second peer during bootstrap
"""
# Assert origin peer ID is persisted as `first_voter`
def assert_first_voter(state: dict[str, Any], origin_peer_id: int):
assert state["first_voter"] == origin_peer_id
rejoin_cluster_test(tmp_path, start_preconfigured_cluster, assert_first_voter)
@pytest.mark.skip("this test simulates and asserts past, incorrect behavior")
def test_rejoin_no_origin(tmp_path: pathlib.Path):
"""
This test checks that `rejoin_cluster_test` is sufficient to reproduce "missing origin peer" bug.
It simulates *earlier* behavior of Qdrant (bypassing all fixes to commit/persist/recover origin
peer ID/URL), and then checks that new peer added to such cluster has *invalid* state.
This test is disabled by default, but it's useful to "test the tests" and reproduce original bug.
"""
# Overwrite `first_voter` peer
def overwrite_first_voter(state: dict[str, Any], _: Any):
state["first_voter"] = 1337
return state
rejoin_cluster_test(tmp_path, start_preconfigured_cluster, overwrite_first_voter, expected_shards=2)
def test_rejoin_recover_origin(tmp_path: pathlib.Path):
"""
This test checks that Qdrant recovers origin peer ID from WAL, if origin peer was not yet
removed from the cluster.
"""
collection = "test_collection"
peers = 3
shards = 3
# Start cluster
peer_uris, peer_dirs, bootstrap_uri = start_preconfigured_cluster(tmp_path, peers)
# Get origin peer ID
origin_peer_id = get_cluster_info(peer_uris[0])["peer_id"]
# Wait a few seconds for consensus to catch up
sleep(5)
# Kill second peer
second_peer = processes.pop(1)
second_peer.kill()
# Remove `first_voter` from `raft_state.json`
with open(f"{peer_dirs[1]}/storage/raft_state.json", "r+") as file:
state = json.load(file)
del state["first_voter"]
file.seek(0, io.SEEK_SET)
file.truncate()
json.dump(state, file)
# Restart second peer with the same URI and ports
second_peer_uri, bootstrap_uri = start_first_peer(peer_dirs[1], "peer_0_1_restarted.log", second_peer.p2p_port)
wait_for_peer_online(second_peer_uri)
# Assert second peer recovered `first_voter` from WAL
with open(f"{peer_dirs[1]}/storage/raft_state.json", "r") as file:
state = json.load(file)
assert state["first_voter"] == origin_peer_id
# Create collection, move all shards from first peer, remove first peer from cluster
create_collection(peer_uris[0], collection, shards, 1)
move_all_shards_from_peer(peer_uris[0], collection)
remove_peer(peer_uris[0])
processes.pop(0).kill()
# Wait a few seconds for new leader
sleep(5)
# Add new peer to cluster
new_peer_uri, new_peer_dir = add_new_peer(tmp_path, peers, bootstrap_uri, collection)
# Assert that new peer observe expected number of remote shards
info = get_collection_cluster_info(new_peer_uri, collection)
assert len(info["remote_shards"]) == shards
def rejoin_cluster_test(
tmp_path: pathlib.Path,
start_cluster: Callable[[pathlib.Path, int], tuple[list[str], list[pathlib.Path], str]],
raft_state: Callable[[dict[str, Any], int], Any | None],
collection: str = "test_collection",
peers: int = 3,
shards: int = 3,
expected_shards: int = 3,
):
"""
Parameterized test body, that tests adding new peer after origin peer was removed from the cluster.
See: <https://github.com/qdrant/qdrant/issues/5138>
"""
# Start cluster
peer_uris, peer_dirs, bootstrap_uri = start_cluster(tmp_path, peers)
# Get origin peer ID
origin_peer_id = get_cluster_info(peer_uris[0])["peer_id"]
# Create collection, move all shards from first peer, remove first peer from cluster
create_collection(peer_uris[0], collection, shards, 1)
move_all_shards_from_peer(peer_uris[0], collection)
remove_peer(peer_uris[0])
processes.pop(0).kill()
# Generally, we could use *any* (second/third/random/last/etc) peer to bootstrap new peer from,
# but using second peer allows to (trivially) catch a single additional corner case in how we
# initialize consensus state when bootstrapping new peer.
# Kill second peer
second_peer = processes.pop(0)
second_peer.kill()
# Check/modify last peer `raft_state.json`
with open(f"{peer_dirs[1]}/storage/raft_state.json", "r+") as file:
state = json.load(file)
if new_state := raft_state(state, origin_peer_id):
file.seek(0, io.SEEK_SET)
file.truncate()
json.dump(new_state, file)
# Restart second peer with the same URI and ports
second_peer_uri, bootstrap_uri = start_first_peer(peer_dirs[1], "peer_0_1_restarted.log", second_peer.p2p_port)
wait_for_peer_online(second_peer_uri)
# Add new peer to cluster
new_peer_uri, new_peer_dir = add_new_peer(tmp_path, peers, bootstrap_uri, collection)
# Assert that new peer observe expected number of remote shards
info = get_collection_cluster_info(new_peer_uri, collection)
assert len(info["remote_shards"]) == expected_shards
def start_preconfigured_cluster(tmp_path: pathlib.Path, peers: int = 3):
assert_project_root()
# Collect peer URIs
peer_uris = []
# Create peer directories
peer_dirs = make_peer_folders(tmp_path, peers)
# Copy first peer Raft state and WAL from `test_cluster_rejoin_data`.
#
# It's just an "empty" peer, but its peer ID is *not* committed into WAL. We can use this peer to
# test that first peer ID is correctly recovered/propagated, even when it's not committed into WAL.
shutil.copytree("tests/consensus_tests/test_cluster_rejoin_data", f"{peer_dirs[0]}/storage")
# Modify peer URI in Raft state to prevent URI change on startup 🙄
p2p_port = get_port()
grpc_port = get_port()
http_port = get_port()
with open(f"{peer_dirs[0]}/storage/raft_state.json", "r+") as file:
state = json.load(file)
state["peer_address_by_id"][str(state["this_peer_id"])] = f"http://127.0.0.1:{p2p_port}"
file.seek(0, io.SEEK_SET)
file.truncate()
json.dump(state, file)
# Start first peer
first_peer_uri, bootstrap_uri = start_first_peer(peer_dirs[0], "peer_0_0.log", p2p_port)
peer_uris.append(first_peer_uri)
wait_for_peer_online(first_peer_uri)
# Bootstrap other peers
for peer_idx in range(1, peers):
peer_uri = start_peer(peer_dirs[peer_idx], f"peer_0_{peer_idx}.log", bootstrap_uri)
peer_uris.append(peer_uri)
wait_all_peers_up(peer_uris)
return peer_uris, peer_dirs, bootstrap_uri
def move_all_shards_from_peer(peer_uri: str, collection: str = "test_collection") -> tuple[int, int]:
"""
Moves all shards from peer at `peer_uri` to another (random) peer in the cluster.
"""
# Find peer to move shards to
info = get_cluster_info(peer_uri)
current_peer_id = info["peer_id"]
other_peer_id = None
for peer_id, info in info["peers"].items():
peer_id = int(peer_id)
if peer_id != current_peer_id:
other_peer_id = peer_id
break
assert other_peer_id
# Move all shards from first peer to second peer
info = get_collection_cluster_info(peer_uri, collection)
for shard in info["local_shards"]:
resp = requests.post(f"{peer_uri}/collections/{collection}/cluster", json={
"move_shard": {
"from_peer_id": current_peer_id,
"to_peer_id": other_peer_id,
"shard_id": shard["shard_id"],
}
})
assert_http_ok(resp)
# Wait until all transfers finished
wait_for_collection_shard_transfers_count(peer_uri, collection, 0)
return current_peer_id, other_peer_id
def remove_peer(peer_uri: str, peer_id: int | None = None):
if peer_id is None:
info = get_cluster_info(peer_uri)
peer_id = info["peer_id"]
resp = requests.delete(f"{peer_uri}/cluster/peer/{peer_id}")
assert_http_ok(resp)
def add_new_peer(tmp_path: pathlib.Path, peer_idx: int, bootstrap_uri: str, collection: str | None = None):
peer_dir = make_peer_folder(tmp_path, peer_idx)
peer_uri = start_peer(peer_dir, f"peer_0_{peer_idx}.log", bootstrap_uri)
wait_for_peer_online(peer_uri)
if collection is not None:
wait_collection_on_all_peers(collection, [peer_uri])
return peer_uri, peer_dir
|