Spaces:

reztilop
/

colibri.qdrant

Build error

colibri.qdrant / tests /consensus_tests /test_dummy_shard.py

Gouzi Mohaled

Ajout du dossier tests

3932407 5 months ago

6.93 kB

	import pathlib

	from requests import get, post, put, delete
	from .fixtures import create_collection, upsert_random_points
	from .utils import *

	N_PEERS = 3
	COLLECTION_NAME = "test_collection"


	def test_dummy_shard_all_reads_and_writes_succeed(tmp_path: pathlib.Path):
	peer_url = start_cluster_with_corrupted_node(N_PEERS, 2, 1, tmp_path)
	read_requests(peer_url, 200)
	write_requests(peer_url, 200, 200)
	collection_snapshot_and_collection_delete(peer_url, check_failure=False)

	def test_dummy_shard_all_reads_fail(tmp_path: pathlib.Path):
	peer_url = start_cluster_with_corrupted_node(N_PEERS, 1, 1, tmp_path)
	read_requests(peer_url, 500)
	collection_snapshot_and_collection_delete(peer_url)

	# When first "write" request fails, it marks shard as "dead".
	# `write_consistency_factor` always "capped" at the number of "alive" shards.
	#
	# So, when the shard is marked as "dead", `write_consistency_factor`, effectively,
	# becomes 2 instead of 3, and so the following requests start to succeed...
	# until the shard switches to the "partial" state. 🙈
	#
	# Even though we add some special handling for the `DummyShard`, the node still "flickers"
	# into "partial" state and then back to "dead" state, and so it's kinda hard
	# to run this test reliably. :/
	@pytest.mark.skip(reason="hard to test reliably")
	def test_dummy_shard_only_first_write_fails(tmp_path: pathlib.Path):
	peer_url = start_cluster_with_corrupted_node(1, N_PEERS, N_PEERS, tmp_path)
	write_requests(peer_url, 500, 200)


	def start_cluster_with_corrupted_node(
	shard_number, replication_factor, write_consistency_factor, tmp_path):

	assert_project_root()

	peer_urls, peer_dirs, bootstrap_url = start_cluster(tmp_path, N_PEERS)

	create_collection(
	peer_urls[0],
	shard_number=shard_number,
	replication_factor=replication_factor,
	write_consistency_factor=write_consistency_factor,
	)

	wait_collection_exists_and_active_on_all_peers(
	collection_name="test_collection",
	peer_api_uris=peer_urls,
	)

	upsert_random_points(peer_urls[0], 100)

	# Kill the last peer
	processes.pop().kill()

	# Find a local shard inside the collection
	collection_path = Path(peer_dirs[-1])/"storage"/"collections"/COLLECTION_NAME

	segments_path = next(filter(
	lambda segments: segments.exists(),
	map(lambda shard: shard/"segments", collection_path.iterdir()),
	))

	# Find a segment inside a local shard
	segment_path = next(filter(lambda path: path.is_dir(), segments_path.iterdir()))

	# Corrupt `segment.json` file inside a segment (to trigger collection load failure)
	segment_json_path = segment_path/"segment.json"

	with open(segment_json_path, "a") as segment_json_file:
	segment_json_file.write("borked")

	# Restart the peer
	peer_url = start_peer(peer_dirs[-1], "peer_0_restarted.log", bootstrap_url, extra_env={
	"QDRANT__STORAGE__HANDLE_COLLECTION_LOAD_ERRORS": "true"
	})

	wait_for_peer_online(peer_url)

	return peer_url

	def read_requests(peer_url, expected_status):
	# Collection info
	resp = requests.get(base_url(peer_url))
	assert_http_response(resp, expected_status, "GET", f"collections/{COLLECTION_NAME}")

	TESTS = [
	(get, "points/1"),

	(post, "points", {
	"ids": [1, 2, 3],
	}),

	# TODO: Empty payload is required for `points/scroll`! :/
	(post, "points/scroll", {}),

	# TODO: Empty payload is required for `points/count`! :/
	(post, "points/count", {}),

	(post, "points/search", {
	"vector": [.1, .1, .1, .1],
	"limit": 10,
	}),

	(post, "points/search/batch", {
	"searches": [
	{ "vector": [.1, .1, .1, .1], "limit": 10 },
	{ "vector": [.2, .2, .2, .2], "limit": 10 },
	]
	}),

	(post, "points/recommend", {
	"positive": [1, 2, 3],
	"limit": 10,
	}),

	(post, "points/recommend/batch", {
	"searches": [
	{ "positive": [1, 2, 3], "limit": 10 },
	{ "positive": [2, 3, 4], "limit": 10 },
	]
	}),
	]

	execute_requests(peer_url, expected_status, TESTS)

	def write_requests(peer_url, first_request_expected_status, following_requests_expected_status):
	TESTS = [
	(put, "points?wait=true", {
	"points": [
	{ "id": 6942, "payload": { "what": "ever" }, "vector": [.6, .9, .4, .2] },
	]
	}),

	(put, "points?wait=true", {
	"batch": {
	"ids": [4269],
	"payloads": [{ "ever": "what" }],
	"vectors": [[.4, .2, .6, .9]],
	}
	}),

	(put, "points/payload?wait=true", {
	"points": [1, 2, 3],
	"payload": { "what": "ever" },
	}),

	(post, "points/payload?wait=true", {
	"points": [1, 2, 3],
	"payload": { "ever": "what" },
	}),

	(post, "points/payload/delete?wait=true", {
	"points": [1, 2, 3],
	"keys": ["city", "what"],
	}),

	(post, "points/payload/clear?wait=true", {
	"points": [1, 2, 3],
	}),

	(post, "points/delete?wait=true", {
	"points": [1, 2, 3],
	}),

	(put, "index", {
	"field_name": "city",
	"field_schema": "keyword",
	}),

	(delete, "index/city"),
	]

	execute_requests(peer_url, first_request_expected_status, TESTS[:1])
	execute_requests(peer_url, following_requests_expected_status, TESTS[1:])

	def collection_snapshot_and_collection_delete(peer_url, check_failure=True):
	if check_failure:
	# Create collection snapshot.
	# Expect that snapshot creation fails unless it was not recovered from another replica
	resp = requests.post(f"{base_url(peer_url)}/snapshots")
	assert_http_response(resp, 500, "POST", "snapshots")

	# Delete collection. We expect this request to succeed in all cluster configurations.
	resp = requests.delete(base_url(peer_url))
	assert_http_response(resp, 200, "DELETE", f"collections/{COLLECTION_NAME}")


	def base_url(peer_url):
	return f"{peer_url}/collections/{COLLECTION_NAME}"

	def execute_requests(peer_url, expected_status, tests):
	for method, url, *payload in tests:
	resp = method(
	f"{base_url(peer_url)}/{url}",
	json=payload[0] if payload else None,
	)

	assert_http_response(resp, expected_status, method.__name__.upper(), url)

	def assert_http_response(resp, expected_status, method, url):
	assert expected_status == resp.status_code, \
	f"`{method} {url}` "\
	f"returned an unexpected response (expected {expected_status}, received {resp.status_code}): "\
	f"{resp.json()}"