File size: 1,957 Bytes
3932407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import multiprocessing
import pathlib
import random
from time import sleep

from .fixtures import upsert_random_points, create_collection, create_field_index
from .utils import *

COLLECTION_NAME = "test_collection"


# Test that the maximum segment size property is respected
#
# It sets a very low limit and inserts some points. Qdrant should dynamically
# create a few new appendable segments for all the data to fit.
#
# It also confirms payload indices are configured properly and all payload
# fields are indexed.
def test_max_segment_size(tmp_path: pathlib.Path):
    assert_project_root()

    # Set a low segment size limit and disable indexing
    env={
        "QDRANT__STORAGE__OPTIMIZERS__DEFAULT_SEGMENT_NUMBER": "2",
        "QDRANT__STORAGE__OPTIMIZERS__MAX_SEGMENT_SIZE_KB": "1",
        "QDRANT__STORAGE__OPTIMIZERS__INDEXING_THRESHOLD_KB": "0",
    }

    peer_api_uris, _peer_dirs, _bootstrap_uri = start_cluster(tmp_path, 1, 20000, extra_env=env)

    create_collection(peer_api_uris[0], shard_number=1, replication_factor=1)
    wait_collection_exists_and_active_on_all_peers(
        collection_name=COLLECTION_NAME,
        peer_api_uris=peer_api_uris
    )
    create_field_index(peer_api_uris[0], COLLECTION_NAME, "city", "keyword")

    collection_cluster_info_before = get_collection_info(peer_api_uris[0], COLLECTION_NAME)

    assert collection_cluster_info_before["segments_count"] == 2
    assert collection_cluster_info_before["payload_schema"]["city"]["points"] == 0

    upsert_random_points(peer_api_uris[0], 20, batch_size=5)

    collection_cluster_info_after = get_collection_info(peer_api_uris[0], COLLECTION_NAME)

    # Number of segments must have grown due to limited segment size
    assert collection_cluster_info_before["segments_count"] < collection_cluster_info_after["segments_count"]

    # We must have indexed all points
    assert collection_cluster_info_after["payload_schema"]["city"]["points"] == 20