import math import random import pytest from .helpers.collection_setup import basic_collection_setup, drop_collection from .helpers.helpers import request_with_validation @pytest.fixture(scope='module', autouse=True) def collection_name_lookup(collection_name): return f"{collection_name}_lookup" def random_vector(dim=4): return [random.random() for _ in range(dim)] def random_example(dim=4, min_id=1, max_id=8): if random.random() < 0.5: return random_vector(dim) else: return random.randint(min_id, max_id) def count_ids_in_examples(context, target) -> int: set_ = set() for pair in context: for example in [pair["positive"], pair["negative"]]: if isinstance(example, int): set_.add(example) if isinstance(target, int): set_.add(target) return len(set_) @pytest.fixture(autouse=True, scope="module") def setup(on_disk_vectors, collection_name): basic_collection_setup(collection_name=collection_name, on_disk_vectors=on_disk_vectors) yield drop_collection(collection_name=collection_name) # Context is when we don't include a target vector def test_context(collection_name): context = [ {"positive": random_example(), "negative": random_example()}, {"positive": random_example(), "negative": random_example()}, ] response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "context": context, "limit": 8, }, ) assert response.ok, response.json() scored_points = response.json()["result"] assert len(scored_points) == 8 - count_ids_in_examples(context, None) # Score in context search relates to loss, so max score for context search is 0.0 for point in scored_points: assert point["score"] <= 0.0 # When we only use target, it should be the exact same as search def test_only_target_is_search_with_different_scoring(collection_name): target = random_vector() # First, search response = request_with_validation( api="/collections/{collection_name}/points/search", method="POST", path_params={"collection_name": collection_name}, body={ "vector": target, "limit": 8, }, ) assert response.ok, response.json() search_points = response.json()["result"] # Then, discover response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target, "limit": 8, }, ) assert response.ok, response.json() discover_points = response.json()["result"] assert len(discover_points) == 8 # Results should be in same order, but different scores assert len(search_points) == len(discover_points) for search_point, discover_point in zip(search_points, discover_points): assert search_point["id"] == discover_point["id"] assert search_point["score"] != discover_point["score"] # Only when we use both target and context, we are doing discovery def test_discover_same_context(collection_name): target1 = random_example() context = [ {"positive": random_example(), "negative": random_example()}, {"positive": random_example(), "negative": random_example()}, ] response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target1, "context": context, "limit": 8, "params": { "exact": True, }, }, ) assert response.ok, response.json() scored_points1 = response.json()["result"] assert len(scored_points1) == 8 - count_ids_in_examples(context, target1) target2 = random_example() response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target2, "context": context, "limit": 8, "params": { "exact": True, }, }, ) assert response.ok, response.json() scored_points2 = response.json()["result"] assert len(scored_points2) == 8 - count_ids_in_examples(context, target2) # We keep same context, so context part of the score (integer part) should be the same, # while target part of the score (decimal part) should be different scored_points_2_map = {point["id"]: point for point in scored_points2} for point1, point2 in zip(scored_points1, scored_points2): if point1["id"] in scored_points_2_map: point2 = scored_points_2_map[point1["id"]] assert math.floor(point1["score"]) == math.floor(point2["score"]) target_score1 = point1["score"] - math.floor(point1["score"]) target_score2 = point2["score"] - math.floor(point2["score"]) if target1 == target2: assert math.isclose(target_score1, target_score2, rel_tol=1e-5) else: assert not math.isclose(target_score1, target_score2, rel_tol=1e-5) def test_discover_same_target(collection_name): target = random_example() context1 = [ {"positive": random_example(), "negative": random_example()}, {"positive": random_example(), "negative": random_example()}, ] context2 = [ {"positive": random_example(), "negative": random_example()}, {"positive": random_example(), "negative": random_example()}, ] response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target, "context": context1, "limit": 8, }, ) assert response.ok, response.json() scored_points1 = response.json()["result"] assert len(scored_points1) == 8 - count_ids_in_examples(context1, target) response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target, "context": context2, "limit": 8, }, ) assert response.ok, response.json() scored_points2 = response.json()["result"] assert len(scored_points2) == 8 - count_ids_in_examples(context2, target) # We keep same target, so context part of the score (integer part) can be different, # while target part of the score (decimal part) should be the same scored_points2_map = {point["id"]: point for point in scored_points2} for point1 in scored_points1: if point1["id"] in scored_points2_map: point2 = scored_points2_map[point1["id"]] target_score1 = point1["score"] - math.floor(point1["score"]) target_score2 = point2["score"] - math.floor(point2["score"]) assert math.isclose(target_score1, target_score2, rel_tol=1e-5) def test_discover_batch(collection_name): targets = [] contexts = [] single_results = [] # Singles for i in range(10): target = random_example() targets.append(target) context = [ {"positive": random_example(), "negative": random_example()}, {"positive": random_example(), "negative": random_example()}, ] contexts.append(context) response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target, "context": context, "limit": 8, }, ) assert response.ok, response.json() single_results.append(response.json()["result"]) # Batch searches = [ { "target": target, "context": context, "limit": 8, } for target, context in zip(targets, contexts) ] response = request_with_validation( api="/collections/{collection_name}/points/discover/batch", method="POST", path_params={"collection_name": collection_name}, body={ "searches": searches, }, ) batch_results = response.json()["result"] assert len(single_results) == len(batch_results) for single_result, batch_result in zip(single_results, batch_results): assert single_result == batch_result def test_null_offset(collection_name): target = random_example() response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": target, "limit": 8, "offset": None, }, ) assert response.ok, response.json() def test_discovery_lookup(collection_name, collection_name_lookup): # delete lookup collection if exists response = request_with_validation( api='/collections/{collection_name}', method="DELETE", path_params={'collection_name': collection_name_lookup}, ) assert response.ok, response.text # re-create lookup collection response = request_with_validation( api='/collections/{collection_name}', method="PUT", path_params={'collection_name': collection_name_lookup}, body={ "vectors": { "other": { "size": 4, "distance": "Dot", } } } ) assert response.ok, response.text # insert vectors to lookup collection response = request_with_validation( api='/collections/{collection_name}/points', method="PUT", path_params={'collection_name': collection_name_lookup}, query_params={'wait': 'true'}, body={ "points": [ { "id": 1, "vector": {"other": [1.0, 0.0, 0.0, 0.0]}, }, { "id": 2, "vector": {"other": [0.0, 0.0, 0.0, 2.0]}, }, ] } ) assert response.ok, response.text # check discover by id + lookup_from response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": [0.2, 0.1, 0.9, 0.7], "context": [ { "positive": 1, "negative": 2 }, ], "limit": 10, "lookup_from": { "collection": collection_name_lookup, "vector": "other" } }, ) assert response.ok, response.text discovery_result_by_id = response.json()["result"] # check discover by vector + lookup_from response = request_with_validation( api="/collections/{collection_name}/points/discover", method="POST", path_params={"collection_name": collection_name}, body={ "target": [0.2, 0.1, 0.9, 0.7], "context": [ { "positive": [1.0, 0.0, 0.0, 0.0], "negative": [0.0, 0.0, 0.0, 2.0] }, ], "limit": 10, }, ) assert response.ok, response.text discovery_result_by_vector = response.json()["result"] # check if results are the same assert discovery_result_by_id == discovery_result_by_vector, f"discovery_result_by_id: {discovery_result_by_id}, discovery_result_by_vector: {discovery_result_by_vector}"