colibri.qdrant / lib /segment /src /index /hnsw_index /graph_layers_builder.rs
Gouzi Mohaled
Ajout du dossier lib
84d2a97
use std::cmp::{max, min};
use std::collections::BinaryHeap;
use std::path::Path;
use std::sync::atomic::AtomicUsize;
use bitvec::prelude::BitVec;
use common::fixed_length_priority_queue::FixedLengthPriorityQueue;
use common::types::{PointOffsetType, ScoreType, ScoredPointOffset};
use parking_lot::{Mutex, MutexGuard, RwLock};
use rand::distributions::Uniform;
use rand::Rng;
use super::graph_links::GraphLinks;
use crate::common::operation_error::OperationResult;
use crate::index::hnsw_index::entry_points::EntryPoints;
use crate::index::hnsw_index::graph_layers::{GraphLayers, GraphLayersBase, LinkContainer};
use crate::index::hnsw_index::graph_links::GraphLinksConverter;
use crate::index::hnsw_index::point_scorer::FilteredScorer;
use crate::index::hnsw_index::search_context::SearchContext;
use crate::index::visited_pool::{VisitedListHandle, VisitedPool};
pub type LockedLinkContainer = RwLock<LinkContainer>;
pub type LockedLayersContainer = Vec<LockedLinkContainer>;
/// Same as `GraphLayers`, but allows to build in parallel
/// Convertible to `GraphLayers`
pub struct GraphLayersBuilder {
max_level: AtomicUsize,
m: usize,
m0: usize,
ef_construct: usize,
// Factor of level probability
level_factor: f64,
// Exclude points according to "not closer than base" heuristic?
use_heuristic: bool,
links_layers: Vec<LockedLayersContainer>,
entry_points: Mutex<EntryPoints>,
// Fields used on construction phase only
visited_pool: VisitedPool,
// List of bool flags, which defines if the point is already indexed or not
ready_list: RwLock<BitVec>,
}
impl GraphLayersBase for GraphLayersBuilder {
fn get_visited_list_from_pool(&self) -> VisitedListHandle {
self.visited_pool.get(self.num_points())
}
fn links_map<F>(&self, point_id: PointOffsetType, level: usize, mut f: F)
where
F: FnMut(PointOffsetType),
{
let links = self.links_layers[point_id as usize][level].read();
let ready_list = self.ready_list.read();
for link in links.iter() {
if ready_list[*link as usize] {
f(*link);
}
}
}
fn get_m(&self, level: usize) -> usize {
if level == 0 {
self.m0
} else {
self.m
}
}
}
impl GraphLayersBuilder {
pub fn get_entry_points(&self) -> MutexGuard<EntryPoints> {
self.entry_points.lock()
}
pub fn into_graph_layers<TGraphLinks: GraphLinks>(
self,
path: Option<&Path>,
) -> OperationResult<GraphLayers<TGraphLinks>> {
let unlocker_links_layers = self
.links_layers
.into_iter()
.map(|l| l.into_iter().map(|l| l.into_inner()).collect())
.collect();
let mut links_converter = GraphLinksConverter::new(unlocker_links_layers);
if let Some(path) = path {
links_converter.save_as(path)?;
}
let links = TGraphLinks::from_converter(links_converter)?;
Ok(GraphLayers {
m: self.m,
m0: self.m0,
ef_construct: self.ef_construct,
links,
entry_points: self.entry_points.into_inner(),
visited_pool: self.visited_pool,
})
}
pub fn new_with_params(
num_vectors: usize, // Initial number of points in index
m: usize, // Expected M for non-first layer
m0: usize, // Expected M for first layer
ef_construct: usize,
entry_points_num: usize, // Depends on number of points
use_heuristic: bool,
reserve: bool,
) -> Self {
let links_layers = std::iter::repeat_with(|| {
vec![RwLock::new(if reserve {
Vec::with_capacity(m0)
} else {
vec![]
})]
})
.take(num_vectors)
.collect();
let ready_list = RwLock::new(BitVec::repeat(false, num_vectors));
Self {
max_level: AtomicUsize::new(0),
m,
m0,
ef_construct,
level_factor: 1.0 / (max(m, 2) as f64).ln(),
use_heuristic,
links_layers,
entry_points: Mutex::new(EntryPoints::new(entry_points_num)),
visited_pool: VisitedPool::new(),
ready_list,
}
}
pub fn new(
num_vectors: usize, // Initial number of points in index
m: usize, // Expected M for non-first layer
m0: usize, // Expected M for first layer
ef_construct: usize,
entry_points_num: usize, // Depends on number of points
use_heuristic: bool,
) -> Self {
Self::new_with_params(
num_vectors,
m,
m0,
ef_construct,
entry_points_num,
use_heuristic,
true,
)
}
pub fn merge_from_other(&mut self, other: GraphLayersBuilder) {
self.max_level = AtomicUsize::new(max(
self.max_level.load(std::sync::atomic::Ordering::Relaxed),
other.max_level.load(std::sync::atomic::Ordering::Relaxed),
));
let mut visited_list = self.visited_pool.get(self.num_points());
if other.links_layers.len() > self.links_layers.len() {
self.links_layers
.resize_with(other.links_layers.len(), Vec::new);
}
for (point_id, layers) in other.links_layers.into_iter().enumerate() {
let current_layers = &mut self.links_layers[point_id];
for (level, other_links) in layers.into_iter().enumerate() {
if current_layers.len() <= level {
current_layers.push(other_links);
} else {
let other_links = other_links.into_inner();
visited_list.next_iteration();
let mut current_links = current_layers[level].write();
current_links.iter().copied().for_each(|x| {
visited_list.check_and_update_visited(x);
});
for other_link in other_links
.into_iter()
.filter(|x| !visited_list.check_and_update_visited(*x))
{
current_links.push(other_link);
}
}
}
}
self.entry_points
.lock()
.merge_from_other(other.entry_points.into_inner());
}
fn num_points(&self) -> usize {
self.links_layers.len()
}
/// Generate random level for a new point, according to geometric distribution
pub fn get_random_layer<R>(&self, rng: &mut R) -> usize
where
R: Rng + ?Sized,
{
let distribution = Uniform::new(0.0, 1.0);
let sample: f64 = rng.sample(distribution);
let picked_level = -sample.ln() * self.level_factor;
picked_level.round() as usize
}
fn get_point_level(&self, point_id: PointOffsetType) -> usize {
self.links_layers[point_id as usize].len() - 1
}
pub fn set_levels(&mut self, point_id: PointOffsetType, level: usize) {
if self.links_layers.len() <= point_id as usize {
while self.links_layers.len() <= point_id as usize {
self.links_layers.push(vec![]);
}
}
let point_layers = &mut self.links_layers[point_id as usize];
while point_layers.len() <= level {
let links = Vec::with_capacity(self.m);
point_layers.push(RwLock::new(links));
}
self.max_level
.fetch_max(level, std::sync::atomic::Ordering::Relaxed);
}
/// Connect new point to links, so that links contains only closest points
fn connect_new_point<F>(
links: &mut LinkContainer,
new_point_id: PointOffsetType,
target_point_id: PointOffsetType,
level_m: usize,
mut score_internal: F,
) where
F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType,
{
// ToDo: binary search here ? (most likely does not worth it)
let new_to_target = score_internal(target_point_id, new_point_id);
let mut id_to_insert = links.len();
for (i, &item) in links.iter().enumerate() {
let target_to_link = score_internal(target_point_id, item);
if target_to_link < new_to_target {
id_to_insert = i;
break;
}
}
if links.len() < level_m {
links.insert(id_to_insert, new_point_id);
} else if id_to_insert != links.len() {
links.pop();
links.insert(id_to_insert, new_point_id);
}
}
/// <https://github.com/nmslib/hnswlib/issues/99>
fn select_candidate_with_heuristic_from_sorted<F>(
candidates: impl Iterator<Item = ScoredPointOffset>,
m: usize,
mut score_internal: F,
) -> Vec<PointOffsetType>
where
F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType,
{
let mut result_list = Vec::with_capacity(m);
for current_closest in candidates {
if result_list.len() >= m {
break;
}
let mut is_good = true;
for &selected_point in &result_list {
let dist_to_already_selected = score_internal(current_closest.idx, selected_point);
if dist_to_already_selected > current_closest.score {
is_good = false;
break;
}
}
if is_good {
result_list.push(current_closest.idx);
}
}
result_list
}
/// <https://github.com/nmslib/hnswlib/issues/99>
fn select_candidates_with_heuristic<F>(
candidates: FixedLengthPriorityQueue<ScoredPointOffset>,
m: usize,
score_internal: F,
) -> Vec<PointOffsetType>
where
F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType,
{
let closest_iter = candidates.into_iter();
Self::select_candidate_with_heuristic_from_sorted(closest_iter, m, score_internal)
}
pub fn link_new_point(&self, point_id: PointOffsetType, mut points_scorer: FilteredScorer) {
// Check if there is an suitable entry point
// - entry point level if higher or equal
// - it satisfies filters
let level = self.get_point_level(point_id);
let entry_point_opt = self
.entry_points
.lock()
.get_entry_point(|point_id| points_scorer.check_vector(point_id));
match entry_point_opt {
// New point is a new empty entry (for this filter, at least)
// We can't do much here, so just quit
None => {}
// Entry point found.
Some(entry_point) => {
let mut level_entry = if entry_point.level > level {
// The entry point is higher than a new point
// Let's find closest one on same level
// greedy search for a single closest point
self.search_entry(
entry_point.point_id,
entry_point.level,
level,
&mut points_scorer,
)
} else {
ScoredPointOffset {
idx: entry_point.point_id,
score: points_scorer.score_internal(point_id, entry_point.point_id),
}
};
// minimal common level for entry points
let linking_level = min(level, entry_point.level);
for curr_level in (0..=linking_level).rev() {
let level_m = self.get_m(curr_level);
let mut visited_list = self.get_visited_list_from_pool();
visited_list.check_and_update_visited(level_entry.idx);
let mut search_context = SearchContext::new(level_entry, self.ef_construct);
self._search_on_level(
&mut search_context,
curr_level,
&mut visited_list,
&mut points_scorer,
);
if let Some(the_nearest) = search_context.nearest.iter().max() {
level_entry = *the_nearest;
}
let scorer = |a, b| points_scorer.score_internal(a, b);
if self.use_heuristic {
let selected_nearest = {
let mut existing_links =
self.links_layers[point_id as usize][curr_level].write();
{
let ready_list = self.ready_list.read();
for &existing_link in existing_links.iter() {
if !visited_list.check(existing_link)
&& ready_list[existing_link as usize]
{
search_context.process_candidate(ScoredPointOffset {
idx: existing_link,
score: points_scorer.score_point(existing_link),
});
}
}
}
let selected_nearest = Self::select_candidates_with_heuristic(
search_context.nearest,
level_m,
scorer,
);
existing_links.clone_from(&selected_nearest);
selected_nearest
};
for &other_point in &selected_nearest {
let mut other_point_links =
self.links_layers[other_point as usize][curr_level].write();
if other_point_links.len() < level_m {
// If linked point is lack of neighbours
other_point_links.push(point_id);
} else {
let mut candidates = BinaryHeap::with_capacity(level_m + 1);
candidates.push(ScoredPointOffset {
idx: point_id,
score: scorer(point_id, other_point),
});
for other_point_link in
other_point_links.iter().take(level_m).copied()
{
candidates.push(ScoredPointOffset {
idx: other_point_link,
score: scorer(other_point_link, other_point),
});
}
let selected_candidates =
Self::select_candidate_with_heuristic_from_sorted(
candidates.into_sorted_vec().into_iter().rev(),
level_m,
scorer,
);
other_point_links.clear(); // this do not free memory, which is good
for selected in selected_candidates.iter().copied() {
other_point_links.push(selected);
}
}
}
} else {
for nearest_point in &search_context.nearest {
{
let mut links =
self.links_layers[point_id as usize][curr_level].write();
Self::connect_new_point(
&mut links,
nearest_point.idx,
point_id,
level_m,
scorer,
);
}
{
let mut links = self.links_layers[nearest_point.idx as usize]
[curr_level]
.write();
Self::connect_new_point(
&mut links,
point_id,
nearest_point.idx,
level_m,
scorer,
);
}
}
}
}
}
}
self.ready_list.write().set(point_id as usize, true);
self.entry_points
.lock()
.new_point(point_id, level, |point_id| {
points_scorer.check_vector(point_id)
});
}
/// This function returns average number of links per node in HNSW graph
/// on specified level.
///
/// Useful for:
/// - estimating memory consumption
/// - percolation threshold estimation
/// - debugging
pub fn get_average_connectivity_on_level(&self, level: usize) -> f32 {
let mut sum = 0;
let mut count = 0;
for links in self.links_layers.iter() {
if links.len() > level {
sum += links[level].read().len();
count += 1;
}
}
if count == 0 {
0.0
} else {
sum as f32 / count as f32
}
}
}
#[cfg(test)]
mod tests {
use itertools::Itertools;
use rand::prelude::StdRng;
use rand::seq::SliceRandom;
use rand::SeedableRng;
use super::*;
use crate::data_types::vectors::{DenseVector, VectorElementType};
use crate::fixtures::index_fixtures::{
random_vector, FakeFilterContext, TestRawScorerProducer,
};
use crate::index::hnsw_index::graph_links::GraphLinksRam;
use crate::index::hnsw_index::tests::create_graph_layer_fixture;
use crate::spaces::metric::Metric;
use crate::spaces::simple::{CosineMetric, EuclidMetric};
use crate::vector_storage::chunked_vector_storage::VectorOffsetType;
const M: usize = 8;
#[cfg(not(windows))]
fn parallel_graph_build<TMetric: Metric<VectorElementType> + Sync + Send, R>(
num_vectors: usize,
dim: usize,
use_heuristic: bool,
rng: &mut R,
) -> (TestRawScorerProducer<TMetric>, GraphLayersBuilder)
where
R: Rng + ?Sized,
{
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(2)
.build()
.unwrap();
let m = M;
let ef_construct = 16;
let entry_points_num = 10;
let vector_holder = TestRawScorerProducer::<TMetric>::new(dim, num_vectors, rng);
let mut graph_layers = GraphLayersBuilder::new(
num_vectors,
m,
m * 2,
ef_construct,
entry_points_num,
use_heuristic,
);
for idx in 0..(num_vectors as PointOffsetType) {
let level = graph_layers.get_random_layer(rng);
graph_layers.set_levels(idx, level);
}
pool.install(|| {
(0..(num_vectors as PointOffsetType))
.into_par_iter()
.for_each(|idx| {
let fake_filter_context = FakeFilterContext {};
let added_vector = vector_holder.vectors.get(idx as VectorOffsetType).to_vec();
let raw_scorer = vector_holder.get_raw_scorer(added_vector).unwrap();
let scorer =
FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context));
graph_layers.link_new_point(idx, scorer);
raw_scorer.take_hardware_counter().discard_results();
});
});
(vector_holder, graph_layers)
}
fn create_graph_layer<TMetric: Metric<VectorElementType>, R>(
num_vectors: usize,
dim: usize,
use_heuristic: bool,
rng: &mut R,
) -> (TestRawScorerProducer<TMetric>, GraphLayersBuilder)
where
R: Rng + ?Sized,
{
let m = M;
let ef_construct = 16;
let entry_points_num = 10;
let vector_holder = TestRawScorerProducer::<TMetric>::new(dim, num_vectors, rng);
let mut graph_layers = GraphLayersBuilder::new(
num_vectors,
m,
m * 2,
ef_construct,
entry_points_num,
use_heuristic,
);
for idx in 0..(num_vectors as PointOffsetType) {
let level = graph_layers.get_random_layer(rng);
graph_layers.set_levels(idx, level);
}
for idx in 0..(num_vectors as PointOffsetType) {
let fake_filter_context = FakeFilterContext {};
let added_vector = vector_holder.vectors.get(idx as VectorOffsetType).to_vec();
let raw_scorer = vector_holder.get_raw_scorer(added_vector.clone()).unwrap();
let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context));
graph_layers.link_new_point(idx, scorer);
raw_scorer.take_hardware_counter().discard_results();
}
(vector_holder, graph_layers)
}
#[cfg(not(windows))] // https://github.com/qdrant/qdrant/issues/1452
#[test]
fn test_parallel_graph_build() {
let num_vectors = 1000;
let dim = 8;
let mut rng = StdRng::seed_from_u64(42);
type M = CosineMetric;
// let (vector_holder, graph_layers_builder) =
// create_graph_layer::<M, _>(num_vectors, dim, false, &mut rng);
let (vector_holder, graph_layers_builder) =
parallel_graph_build::<M, _>(num_vectors, dim, false, &mut rng);
let main_entry = graph_layers_builder
.entry_points
.lock()
.get_entry_point(|_x| true)
.expect("Expect entry point to exists");
assert!(main_entry.level > 0);
let num_levels = graph_layers_builder
.links_layers
.iter()
.map(|x| x.len())
.max()
.unwrap();
assert_eq!(main_entry.level + 1, num_levels);
let total_links_0: usize = graph_layers_builder
.links_layers
.iter()
.map(|x| x[0].read().len())
.sum();
assert!(total_links_0 > 0);
eprintln!("total_links_0 = {total_links_0:#?}");
eprintln!("num_vectors = {num_vectors:#?}");
assert!(total_links_0 as f64 / num_vectors as f64 > M as f64);
let top = 5;
let query = random_vector(&mut rng, dim);
let processed_query = <M as Metric<VectorElementType>>::preprocess(query.clone());
let mut reference_top = FixedLengthPriorityQueue::new(top);
for idx in 0..vector_holder.vectors.len() as PointOffsetType {
let vec = &vector_holder.vectors.get(idx as VectorOffsetType);
reference_top.push(ScoredPointOffset {
idx,
score: M::similarity(vec, &processed_query),
});
}
let graph = graph_layers_builder
.into_graph_layers::<GraphLinksRam>(None)
.unwrap();
let fake_filter_context = FakeFilterContext {};
let raw_scorer = vector_holder.get_raw_scorer(query).unwrap();
let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context));
let ef = 16;
let graph_search = graph.search(top, ef, scorer, None);
raw_scorer.take_hardware_counter().discard_results();
assert_eq!(reference_top.into_vec(), graph_search);
}
#[test]
fn test_add_points() {
let num_vectors = 1000;
let dim = 8;
let mut rng = StdRng::seed_from_u64(42);
let mut rng2 = StdRng::seed_from_u64(42);
type M = CosineMetric;
let (vector_holder, graph_layers_builder) =
create_graph_layer::<M, _>(num_vectors, dim, false, &mut rng);
let (_vector_holder_orig, graph_layers_orig) =
create_graph_layer_fixture::<M, _>(num_vectors, M, dim, false, &mut rng2, None);
// check is graph_layers_builder links are equal to graph_layers_orig
let orig_len = graph_layers_orig.links.num_points();
let builder_len = graph_layers_builder.links_layers.len();
assert_eq!(orig_len, builder_len);
for idx in 0..builder_len {
let links_orig = &graph_layers_orig
.links
.links(idx as PointOffsetType, 0)
.collect_vec();
let links_builder = graph_layers_builder.links_layers[idx][0].read();
let link_container_from_builder = links_builder.iter().copied().collect::<Vec<_>>();
assert_eq!(links_orig, &link_container_from_builder);
}
let main_entry = graph_layers_builder
.entry_points
.lock()
.get_entry_point(|_x| true)
.expect("Expect entry point to exists");
assert!(main_entry.level > 0);
let num_levels = graph_layers_builder
.links_layers
.iter()
.map(|x| x.len())
.max()
.unwrap();
assert_eq!(main_entry.level + 1, num_levels);
let total_links_0: usize = graph_layers_builder
.links_layers
.iter()
.map(|x| x[0].read().len())
.sum();
assert!(total_links_0 > 0);
eprintln!("total_links_0 = {total_links_0:#?}");
eprintln!("num_vectors = {num_vectors:#?}");
assert!(total_links_0 as f64 / num_vectors as f64 > M as f64);
let top = 5;
let query = random_vector(&mut rng, dim);
let processed_query = <M as Metric<VectorElementType>>::preprocess(query.clone());
let mut reference_top = FixedLengthPriorityQueue::new(top);
for idx in 0..vector_holder.vectors.len() as PointOffsetType {
let vec = &vector_holder.vectors.get(idx as VectorOffsetType);
reference_top.push(ScoredPointOffset {
idx,
score: M::similarity(vec, &processed_query),
});
}
let graph = graph_layers_builder
.into_graph_layers::<GraphLinksRam>(None)
.unwrap();
let fake_filter_context = FakeFilterContext {};
let raw_scorer = vector_holder.get_raw_scorer(query).unwrap();
let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context));
let ef = 16;
let graph_search = graph.search(top, ef, scorer, None);
raw_scorer.take_hardware_counter().discard_results();
assert_eq!(reference_top.into_vec(), graph_search);
}
#[test]
#[ignore]
fn test_hnsw_graph_properties() {
const NUM_VECTORS: usize = 5_000;
const DIM: usize = 16;
const M: usize = 16;
const EF_CONSTRUCT: usize = 64;
const USE_HEURISTIC: bool = true;
let mut rng = StdRng::seed_from_u64(42);
let vector_holder = TestRawScorerProducer::<CosineMetric>::new(DIM, NUM_VECTORS, &mut rng);
let mut graph_layers_builder =
GraphLayersBuilder::new(NUM_VECTORS, M, M * 2, EF_CONSTRUCT, 10, USE_HEURISTIC);
let fake_filter_context = FakeFilterContext {};
for idx in 0..(NUM_VECTORS as PointOffsetType) {
let added_vector = vector_holder.vectors.get(idx as VectorOffsetType).to_vec();
let raw_scorer = vector_holder.get_raw_scorer(added_vector).unwrap();
let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context));
let level = graph_layers_builder.get_random_layer(&mut rng);
graph_layers_builder.set_levels(idx, level);
graph_layers_builder.link_new_point(idx, scorer);
raw_scorer.take_hardware_counter().discard_results();
}
let graph_layers = graph_layers_builder
.into_graph_layers::<GraphLinksRam>(None)
.unwrap();
let num_points = graph_layers.links.num_points();
eprintln!("number_points = {num_points:#?}");
let max_layer = (0..NUM_VECTORS)
.map(|i| graph_layers.links.point_level(i as PointOffsetType))
.max()
.unwrap();
eprintln!("max_layer = {:#?}", max_layer + 1);
let layers910 = graph_layers.links.point_level(910);
let links910 = (0..layers910 + 1)
.map(|i| graph_layers.links.links(910, i).collect_vec())
.collect::<Vec<_>>();
eprintln!("graph_layers.links_layers[910] = {links910:#?}",);
let total_edges: usize = (0..NUM_VECTORS)
.map(|i| graph_layers.links.links(i as PointOffsetType, 0).count())
.sum();
let avg_connectivity = total_edges as f64 / NUM_VECTORS as f64;
eprintln!("avg_connectivity = {avg_connectivity:#?}");
}
#[test]
#[ignore]
fn test_candidate_selection_heuristics() {
const NUM_VECTORS: usize = 100;
const DIM: usize = 16;
const M: usize = 16;
let mut rng = StdRng::seed_from_u64(42);
let vector_holder = TestRawScorerProducer::<EuclidMetric>::new(DIM, NUM_VECTORS, &mut rng);
let mut candidates: FixedLengthPriorityQueue<ScoredPointOffset> =
FixedLengthPriorityQueue::new(NUM_VECTORS);
let new_vector_to_insert = random_vector(&mut rng, DIM);
let scorer = vector_holder.get_raw_scorer(new_vector_to_insert).unwrap();
for i in 0..NUM_VECTORS {
candidates.push(ScoredPointOffset {
idx: i as PointOffsetType,
score: scorer.score_point(i as PointOffsetType),
});
}
let sorted_candidates = candidates.into_vec();
for x in sorted_candidates.iter().take(M) {
eprintln!("sorted_candidates = ({}, {})", x.idx, x.score);
}
let selected_candidates = GraphLayersBuilder::select_candidate_with_heuristic_from_sorted(
sorted_candidates.into_iter(),
M,
|a, b| scorer.score_internal(a, b),
);
for x in selected_candidates.iter() {
eprintln!("selected_candidates = {x}");
}
scorer.take_hardware_counter().discard_results();
}
#[test]
fn test_connect_new_point() {
let num_points = 10;
let m = 6;
let ef_construct = 32;
// See illustration in docs
let points: Vec<DenseVector> = vec![
vec![21.79, 7.18], // Target
vec![20.58, 5.46], // 1 B - yes
vec![21.19, 4.51], // 2 C
vec![24.73, 8.24], // 3 D - yes
vec![24.55, 9.98], // 4 E
vec![26.11, 6.85], // 5 F
vec![17.64, 11.14], // 6 G - yes
vec![14.97, 11.52], // 7 I
vec![14.97, 9.60], // 8 J
vec![16.23, 14.32], // 9 H
vec![12.69, 19.13], // 10 K
];
let scorer = |a: PointOffsetType, b: PointOffsetType| {
-((points[a as usize][0] - points[b as usize][0]).powi(2)
+ (points[a as usize][1] - points[b as usize][1]).powi(2))
.sqrt()
};
let mut insert_ids = (1..points.len() as PointOffsetType).collect_vec();
let mut candidates = FixedLengthPriorityQueue::new(insert_ids.len());
for &id in &insert_ids {
candidates.push(ScoredPointOffset {
idx: id,
score: scorer(0, id),
});
}
let res = GraphLayersBuilder::select_candidates_with_heuristic(candidates, m, scorer);
assert_eq!(&res, &vec![1, 3, 6]);
let mut rng = StdRng::seed_from_u64(42);
let graph_layers_builder = GraphLayersBuilder::new(num_points, m, m, ef_construct, 1, true);
insert_ids.shuffle(&mut rng);
for &id in &insert_ids {
let level_m = graph_layers_builder.get_m(0);
let mut links = graph_layers_builder.links_layers[0][0].write();
GraphLayersBuilder::connect_new_point(&mut links, id, 0, level_m, scorer)
}
let mut result = Vec::new();
graph_layers_builder.links_layers[0][0]
.read()
.iter()
.for_each(|x| result.push(*x));
assert_eq!(&result, &vec![1, 2, 3, 4, 5, 6]);
}
}