Spaces:
Build error
Build error
File size: 6,925 Bytes
3932407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import pathlib
from requests import get, post, put, delete
from .fixtures import create_collection, upsert_random_points
from .utils import *
N_PEERS = 3
COLLECTION_NAME = "test_collection"
def test_dummy_shard_all_reads_and_writes_succeed(tmp_path: pathlib.Path):
peer_url = start_cluster_with_corrupted_node(N_PEERS, 2, 1, tmp_path)
read_requests(peer_url, 200)
write_requests(peer_url, 200, 200)
collection_snapshot_and_collection_delete(peer_url, check_failure=False)
def test_dummy_shard_all_reads_fail(tmp_path: pathlib.Path):
peer_url = start_cluster_with_corrupted_node(N_PEERS, 1, 1, tmp_path)
read_requests(peer_url, 500)
collection_snapshot_and_collection_delete(peer_url)
# When first "write" request fails, it marks shard as "dead".
# `write_consistency_factor` always "capped" at the number of "alive" shards.
#
# So, when the shard is marked as "dead", `write_consistency_factor`, effectively,
# becomes 2 instead of 3, and so the following requests start to succeed...
# until the shard switches to the "partial" state. 🙈
#
# Even though we add some special handling for the `DummyShard`, the node still "flickers"
# into "partial" state and then back to "dead" state, and so it's kinda hard
# to run this test reliably. :/
@pytest.mark.skip(reason="hard to test reliably")
def test_dummy_shard_only_first_write_fails(tmp_path: pathlib.Path):
peer_url = start_cluster_with_corrupted_node(1, N_PEERS, N_PEERS, tmp_path)
write_requests(peer_url, 500, 200)
def start_cluster_with_corrupted_node(
shard_number, replication_factor, write_consistency_factor, tmp_path):
assert_project_root()
peer_urls, peer_dirs, bootstrap_url = start_cluster(tmp_path, N_PEERS)
create_collection(
peer_urls[0],
shard_number=shard_number,
replication_factor=replication_factor,
write_consistency_factor=write_consistency_factor,
)
wait_collection_exists_and_active_on_all_peers(
collection_name="test_collection",
peer_api_uris=peer_urls,
)
upsert_random_points(peer_urls[0], 100)
# Kill the last peer
processes.pop().kill()
# Find a local shard inside the collection
collection_path = Path(peer_dirs[-1])/"storage"/"collections"/COLLECTION_NAME
segments_path = next(filter(
lambda segments: segments.exists(),
map(lambda shard: shard/"segments", collection_path.iterdir()),
))
# Find a segment inside a local shard
segment_path = next(filter(lambda path: path.is_dir(), segments_path.iterdir()))
# Corrupt `segment.json` file inside a segment (to trigger collection load failure)
segment_json_path = segment_path/"segment.json"
with open(segment_json_path, "a") as segment_json_file:
segment_json_file.write("borked")
# Restart the peer
peer_url = start_peer(peer_dirs[-1], "peer_0_restarted.log", bootstrap_url, extra_env={
"QDRANT__STORAGE__HANDLE_COLLECTION_LOAD_ERRORS": "true"
})
wait_for_peer_online(peer_url)
return peer_url
def read_requests(peer_url, expected_status):
# Collection info
resp = requests.get(base_url(peer_url))
assert_http_response(resp, expected_status, "GET", f"collections/{COLLECTION_NAME}")
TESTS = [
(get, "points/1"),
(post, "points", {
"ids": [1, 2, 3],
}),
# TODO: Empty payload is *required* for `points/scroll`! :/
(post, "points/scroll", {}),
# TODO: Empty payload is *required* for `points/count`! :/
(post, "points/count", {}),
(post, "points/search", {
"vector": [.1, .1, .1, .1],
"limit": 10,
}),
(post, "points/search/batch", {
"searches": [
{ "vector": [.1, .1, .1, .1], "limit": 10 },
{ "vector": [.2, .2, .2, .2], "limit": 10 },
]
}),
(post, "points/recommend", {
"positive": [1, 2, 3],
"limit": 10,
}),
(post, "points/recommend/batch", {
"searches": [
{ "positive": [1, 2, 3], "limit": 10 },
{ "positive": [2, 3, 4], "limit": 10 },
]
}),
]
execute_requests(peer_url, expected_status, TESTS)
def write_requests(peer_url, first_request_expected_status, following_requests_expected_status):
TESTS = [
(put, "points?wait=true", {
"points": [
{ "id": 6942, "payload": { "what": "ever" }, "vector": [.6, .9, .4, .2] },
]
}),
(put, "points?wait=true", {
"batch": {
"ids": [4269],
"payloads": [{ "ever": "what" }],
"vectors": [[.4, .2, .6, .9]],
}
}),
(put, "points/payload?wait=true", {
"points": [1, 2, 3],
"payload": { "what": "ever" },
}),
(post, "points/payload?wait=true", {
"points": [1, 2, 3],
"payload": { "ever": "what" },
}),
(post, "points/payload/delete?wait=true", {
"points": [1, 2, 3],
"keys": ["city", "what"],
}),
(post, "points/payload/clear?wait=true", {
"points": [1, 2, 3],
}),
(post, "points/delete?wait=true", {
"points": [1, 2, 3],
}),
(put, "index", {
"field_name": "city",
"field_schema": "keyword",
}),
(delete, "index/city"),
]
execute_requests(peer_url, first_request_expected_status, TESTS[:1])
execute_requests(peer_url, following_requests_expected_status, TESTS[1:])
def collection_snapshot_and_collection_delete(peer_url, check_failure=True):
if check_failure:
# Create collection snapshot.
# Expect that snapshot creation fails unless it was not recovered from another replica
resp = requests.post(f"{base_url(peer_url)}/snapshots")
assert_http_response(resp, 500, "POST", "snapshots")
# Delete collection. We expect this request to succeed in all cluster configurations.
resp = requests.delete(base_url(peer_url))
assert_http_response(resp, 200, "DELETE", f"collections/{COLLECTION_NAME}")
def base_url(peer_url):
return f"{peer_url}/collections/{COLLECTION_NAME}"
def execute_requests(peer_url, expected_status, tests):
for method, url, *payload in tests:
resp = method(
f"{base_url(peer_url)}/{url}",
json=payload[0] if payload else None,
)
assert_http_response(resp, expected_status, method.__name__.upper(), url)
def assert_http_response(resp, expected_status, method, url):
assert expected_status == resp.status_code, \
f"`{method} {url}` "\
f"returned an unexpected response (expected {expected_status}, received {resp.status_code}): "\
f"{resp.json()}"
|