Spaces:
Build error
Build error
File size: 4,155 Bytes
84d2a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
use std::collections::HashMap;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use common::cpu::CpuPermit;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::prelude::StdRng;
use rand::SeedableRng;
use segment::data_types::vectors::{only_default_multi_vector, DEFAULT_VECTOR_NAME};
use segment::entry::entry_point::SegmentEntry;
use segment::fixtures::payload_fixtures::random_multi_vector;
use segment::index::hnsw_index::graph_links::GraphLinksRam;
use segment::index::hnsw_index::hnsw::{HNSWIndex, HnswIndexOpenArgs};
use segment::index::hnsw_index::num_rayon_threads;
use segment::index::VectorIndex;
use segment::segment_constructor::build_segment;
use segment::types::Distance::Dot;
use segment::types::{
HnswConfig, Indexes, MultiVectorConfig, SegmentConfig, SeqNumberType, VectorDataConfig,
VectorStorageType,
};
use tempfile::Builder;
#[cfg(not(target_os = "windows"))]
mod prof;
const NUM_POINTS: usize = 10_000;
const NUM_VECTORS_PER_POINT: usize = 16;
const VECTOR_DIM: usize = 128;
const TOP: usize = 10;
fn multi_vector_search_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("multi-vector-search-group");
let stopped = AtomicBool::new(false);
let mut rnd = StdRng::seed_from_u64(42);
let segment_dir = Builder::new().prefix("data_dir").tempdir().unwrap();
let hnsw_dir = Builder::new().prefix("hnsw_dir").tempdir().unwrap();
let segment_config = SegmentConfig {
vector_data: HashMap::from([(
DEFAULT_VECTOR_NAME.to_owned(),
VectorDataConfig {
size: VECTOR_DIM,
distance: Dot,
storage_type: VectorStorageType::Memory,
index: Indexes::Plain {},
quantization_config: None,
multivector_config: Some(MultiVectorConfig::default()), // uses multivec config
datatype: None,
},
)]),
sparse_vector_data: Default::default(),
payload_storage_type: Default::default(),
};
let mut segment = build_segment(segment_dir.path(), &segment_config, true).unwrap();
for n in 0..NUM_POINTS {
let idx = (n as u64).into();
let multi_vec = random_multi_vector(&mut rnd, VECTOR_DIM, NUM_VECTORS_PER_POINT);
let named_vectors = only_default_multi_vector(&multi_vec);
segment
.upsert_point(n as SeqNumberType, idx, named_vectors)
.unwrap();
}
// build HNSW index
let hnsw_config = HnswConfig {
m: 8,
ef_construct: 16,
full_scan_threshold: 10, // low value to trigger index usage by default
max_indexing_threads: 0,
on_disk: None,
payload_m: None,
};
let permit_cpu_count = num_rayon_threads(hnsw_config.max_indexing_threads);
let permit = Arc::new(CpuPermit::dummy(permit_cpu_count as u32));
let vector_storage = &segment.vector_data[DEFAULT_VECTOR_NAME].vector_storage;
let quantized_vectors = &segment.vector_data[DEFAULT_VECTOR_NAME].quantized_vectors;
let hnsw_index = HNSWIndex::<GraphLinksRam>::open(HnswIndexOpenArgs {
path: hnsw_dir.path(),
id_tracker: segment.id_tracker.clone(),
vector_storage: vector_storage.clone(),
quantized_vectors: quantized_vectors.clone(),
payload_index: segment.payload_index.clone(),
hnsw_config,
permit: Some(permit),
stopped: &stopped,
})
.unwrap();
// intent: bench `search` without filter
group.bench_function("hnsw-multivec-search", |b| {
b.iter(|| {
// new query to avoid caching effects
let query = random_multi_vector(&mut rnd, VECTOR_DIM, NUM_VECTORS_PER_POINT).into();
let results = hnsw_index
.search(&[&query], None, TOP, None, &Default::default())
.unwrap();
assert_eq!(results[0].len(), TOP);
})
});
group.finish();
}
criterion_group! {
name = benches;
config = Criterion::default().with_profiler(prof::FlamegraphProfiler::new(100));
targets = multi_vector_search_benchmark
}
criterion_main!(benches);
|