Spaces:
Build error
Build error
File size: 4,221 Bytes
3932407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import random
from typing import List
import requests
from consensus_tests.assertions import assert_http_ok
CITIES = ["London", "New York", "Paris", "Tokyo", "Berlin", "Rome", "Madrid", "Moscow"]
# dense vector sizing
DENSE_VECTOR_SIZE = 4
# sparse vector sizing
SPARSE_VECTOR_SIZE = 1000
SPARSE_VECTOR_DENSITY = 0.1
# Generate a random dense vector
def random_dense_vector():
return [random.random() for _ in range(DENSE_VECTOR_SIZE)]
# Generate a random sparse vector
def random_sparse_vector():
num_non_zero = int(SPARSE_VECTOR_SIZE * SPARSE_VECTOR_DENSITY)
indices: List[int] = random.sample(range(SPARSE_VECTOR_SIZE), num_non_zero)
values: List[float] = [round(random.random(), 6) for _ in range(num_non_zero)]
return {"indices": indices, "values": values}
def upsert_random_points(
peer_url,
num,
collection_name="test_collection",
fail_on_error=True,
offset=0,
batch_size=None,
wait="true",
ordering="weak",
with_sparse_vector=True,
shard_key=None,
headers={},
):
def get_vector():
# Create points in first peer's collection
vector = {
"": random_dense_vector(),
}
if with_sparse_vector:
vector["sparse-text"] = random_sparse_vector()
return vector
while num > 0:
size = num if batch_size is None else min(num, batch_size)
r_batch = requests.put(
f"{peer_url}/collections/{collection_name}/points?wait={wait}&ordering={ordering}",
json={
"points": [
{
"id": i + offset,
"vector": get_vector(),
"payload": {"city": random.choice(CITIES)},
}
for i in range(size)
],
"shard_key": shard_key,
},
headers=headers,
)
if fail_on_error:
assert_http_ok(r_batch)
num -= size
offset += size
def create_collection(
peer_url,
collection="test_collection",
shard_number=1,
replication_factor=1,
write_consistency_factor=1,
timeout=10,
sharding_method=None,
headers={},
):
# Create collection in peer_url
r_batch = requests.put(
f"{peer_url}/collections/{collection}?timeout={timeout}",
json={
"vectors": {"size": DENSE_VECTOR_SIZE, "distance": "Dot"},
"sparse_vectors": {"sparse-text": {}},
"shard_number": shard_number,
"replication_factor": replication_factor,
"write_consistency_factor": write_consistency_factor,
"sharding_method": sharding_method,
},
headers=headers,
)
assert_http_ok(r_batch)
def drop_collection(peer_url, collection="test_collection", timeout=10, headers={}):
# Delete collection in peer_url
r_delete = requests.delete(
f"{peer_url}/collections/{collection}?timeout={timeout}", headers=headers
)
assert_http_ok(r_delete)
def create_field_index(
peer_url,
collection="test_collection",
field_name="city",
field_schema="keyword",
headers={},
):
# Create field index in peer_url
r_batch = requests.put(
f"{peer_url}/collections/{collection}/index",
json={
"field_name": field_name,
"field_schema": field_schema,
},
headers=headers,
params={"wait": "true"}
)
assert_http_ok(r_batch)
def search(peer_url, vector, city, collection="test_collection"):
q = {
"vector": vector,
"top": 10,
"with_vector": False,
"with_payload": True,
"filter": {"must": [{"key": "city", "match": {"value": city}}]},
}
r_search = requests.post(f"{peer_url}/collections/{collection}/points/search", json=q)
assert_http_ok(r_search)
return r_search.json()["result"]
def count_counts(peer_url, collection="test_collection"):
r_search = requests.post(
f"{peer_url}/collections/{collection}/points/count",
json={
"exact": True,
},
)
assert_http_ok(r_search)
return r_search.json()["result"]["count"]
|