colibri.qdrant / tests /consensus_tests /test_segment_max_size.py
Gouzi Mohaled
Ajout du dossier tests
3932407
import multiprocessing
import pathlib
import random
from time import sleep
from .fixtures import upsert_random_points, create_collection, create_field_index
from .utils import *
COLLECTION_NAME = "test_collection"
# Test that the maximum segment size property is respected
#
# It sets a very low limit and inserts some points. Qdrant should dynamically
# create a few new appendable segments for all the data to fit.
#
# It also confirms payload indices are configured properly and all payload
# fields are indexed.
def test_max_segment_size(tmp_path: pathlib.Path):
assert_project_root()
# Set a low segment size limit and disable indexing
env={
"QDRANT__STORAGE__OPTIMIZERS__DEFAULT_SEGMENT_NUMBER": "2",
"QDRANT__STORAGE__OPTIMIZERS__MAX_SEGMENT_SIZE_KB": "1",
"QDRANT__STORAGE__OPTIMIZERS__INDEXING_THRESHOLD_KB": "0",
}
peer_api_uris, _peer_dirs, _bootstrap_uri = start_cluster(tmp_path, 1, 20000, extra_env=env)
create_collection(peer_api_uris[0], shard_number=1, replication_factor=1)
wait_collection_exists_and_active_on_all_peers(
collection_name=COLLECTION_NAME,
peer_api_uris=peer_api_uris
)
create_field_index(peer_api_uris[0], COLLECTION_NAME, "city", "keyword")
collection_cluster_info_before = get_collection_info(peer_api_uris[0], COLLECTION_NAME)
assert collection_cluster_info_before["segments_count"] == 2
assert collection_cluster_info_before["payload_schema"]["city"]["points"] == 0
upsert_random_points(peer_api_uris[0], 20, batch_size=5)
collection_cluster_info_after = get_collection_info(peer_api_uris[0], COLLECTION_NAME)
# Number of segments must have grown due to limited segment size
assert collection_cluster_info_before["segments_count"] < collection_cluster_info_after["segments_count"]
# We must have indexed all points
assert collection_cluster_info_after["payload_schema"]["city"]["points"] == 20