File size: 4,221 Bytes
3932407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import random
from typing import List

import requests

from consensus_tests.assertions import assert_http_ok

CITIES = ["London", "New York", "Paris", "Tokyo", "Berlin", "Rome", "Madrid", "Moscow"]

# dense vector sizing
DENSE_VECTOR_SIZE = 4

# sparse vector sizing
SPARSE_VECTOR_SIZE = 1000
SPARSE_VECTOR_DENSITY = 0.1


# Generate a random dense vector
def random_dense_vector():
    return [random.random() for _ in range(DENSE_VECTOR_SIZE)]


# Generate a random sparse vector
def random_sparse_vector():
    num_non_zero = int(SPARSE_VECTOR_SIZE * SPARSE_VECTOR_DENSITY)
    indices: List[int] = random.sample(range(SPARSE_VECTOR_SIZE), num_non_zero)
    values: List[float] = [round(random.random(), 6) for _ in range(num_non_zero)]
    return {"indices": indices, "values": values}


def upsert_random_points(
    peer_url,
    num,
    collection_name="test_collection",
    fail_on_error=True,
    offset=0,
    batch_size=None,
    wait="true",
    ordering="weak",
    with_sparse_vector=True,
    shard_key=None,
    headers={},
):

    def get_vector():
        # Create points in first peer's collection
        vector = {
            "": random_dense_vector(),
        }
        if with_sparse_vector:
            vector["sparse-text"] = random_sparse_vector()

        return vector

    while num > 0:
        size = num if batch_size is None else min(num, batch_size)

        r_batch = requests.put(
            f"{peer_url}/collections/{collection_name}/points?wait={wait}&ordering={ordering}",
            json={
                "points": [
                    {
                        "id": i + offset,
                        "vector": get_vector(),
                        "payload": {"city": random.choice(CITIES)},
                    }
                    for i in range(size)
                ],
                "shard_key": shard_key,
            },
            headers=headers,
        )
        if fail_on_error:
            assert_http_ok(r_batch)

        num -= size
        offset += size


def create_collection(
    peer_url,
    collection="test_collection",
    shard_number=1,
    replication_factor=1,
    write_consistency_factor=1,
    timeout=10,
    sharding_method=None,
    headers={},
):
    # Create collection in peer_url
    r_batch = requests.put(
        f"{peer_url}/collections/{collection}?timeout={timeout}",
        json={
            "vectors": {"size": DENSE_VECTOR_SIZE, "distance": "Dot"},
            "sparse_vectors": {"sparse-text": {}},
            "shard_number": shard_number,
            "replication_factor": replication_factor,
            "write_consistency_factor": write_consistency_factor,
            "sharding_method": sharding_method,
        },
        headers=headers,
    )
    assert_http_ok(r_batch)


def drop_collection(peer_url, collection="test_collection", timeout=10, headers={}):
    # Delete collection in peer_url
    r_delete = requests.delete(
        f"{peer_url}/collections/{collection}?timeout={timeout}", headers=headers
    )
    assert_http_ok(r_delete)


def create_field_index(
    peer_url,
    collection="test_collection",
    field_name="city",
    field_schema="keyword",
    headers={},
):
    # Create field index in peer_url
    r_batch = requests.put(
        f"{peer_url}/collections/{collection}/index",
        json={
            "field_name": field_name,
            "field_schema": field_schema,
        },
        headers=headers,
        params={"wait": "true"}
    )
    assert_http_ok(r_batch)


def search(peer_url, vector, city, collection="test_collection"):
    q = {
        "vector": vector,
        "top": 10,
        "with_vector": False,
        "with_payload": True,
        "filter": {"must": [{"key": "city", "match": {"value": city}}]},
    }
    r_search = requests.post(f"{peer_url}/collections/{collection}/points/search", json=q)
    assert_http_ok(r_search)
    return r_search.json()["result"]


def count_counts(peer_url, collection="test_collection"):
    r_search = requests.post(
        f"{peer_url}/collections/{collection}/points/count",
        json={
            "exact": True,
        },
    )
    assert_http_ok(r_search)
    return r_search.json()["result"]["count"]