Spaces:
Build error
Build error
mod prof; | |
use std::borrow::Cow; | |
use std::sync::atomic::AtomicBool; | |
use std::sync::Arc; | |
use atomic_refcell::AtomicRefCell; | |
use common::types::PointOffsetType; | |
use criterion::{criterion_group, criterion_main, Criterion}; | |
use half::f16; | |
use rand::rngs::StdRng; | |
use rand::SeedableRng; | |
use segment::common::rocksdb_wrapper::{open_db, DB_VECTOR_CF}; | |
use segment::fixtures::payload_context_fixture::FixtureIdTracker; | |
use segment::index::sparse_index::sparse_index_config::{SparseIndexConfig, SparseIndexType}; | |
use segment::index::sparse_index::sparse_vector_index::{ | |
SparseVectorIndex, SparseVectorIndexOpenArgs, | |
}; | |
use segment::index::struct_payload_index::StructPayloadIndex; | |
use segment::index::VectorIndex; | |
use segment::payload_storage::in_memory_payload_storage::InMemoryPayloadStorage; | |
use segment::types::VectorStorageDatatype; | |
use segment::vector_storage::simple_sparse_vector_storage::open_simple_sparse_vector_storage; | |
use segment::vector_storage::VectorStorage; | |
use sparse::common::sparse_vector_fixture::random_sparse_vector; | |
use sparse::index::inverted_index::inverted_index_compressed_mmap::InvertedIndexCompressedMmap; | |
use sparse::index::inverted_index::inverted_index_mmap::InvertedIndexMmap; | |
use sparse::index::inverted_index::inverted_index_ram::InvertedIndexRam; | |
use sparse::index::inverted_index::InvertedIndex; | |
use tempfile::Builder; | |
const NUM_VECTORS: usize = 10_000; | |
const MAX_SPARSE_DIM: usize = 1_000; | |
fn sparse_vector_index_build_benchmark(c: &mut Criterion) { | |
let mut group = c.benchmark_group("sparse-vector-build-group"); | |
let stopped = AtomicBool::new(false); | |
let mut rnd = StdRng::seed_from_u64(42); | |
let payload_dir = Builder::new().prefix("payload_dir").tempdir().unwrap(); | |
let storage_dir = Builder::new().prefix("storage_dir").tempdir().unwrap(); | |
let index_dir = Builder::new().prefix("index_dir").tempdir().unwrap(); | |
// setup | |
let id_tracker = Arc::new(AtomicRefCell::new(FixtureIdTracker::new(NUM_VECTORS))); | |
let payload_storage = InMemoryPayloadStorage::default(); | |
let wrapped_payload_storage = Arc::new(AtomicRefCell::new(payload_storage.into())); | |
let payload_index = StructPayloadIndex::open( | |
wrapped_payload_storage, | |
id_tracker.clone(), | |
std::collections::HashMap::new(), | |
payload_dir.path(), | |
true, | |
) | |
.unwrap(); | |
let wrapped_payload_index = Arc::new(AtomicRefCell::new(payload_index)); | |
let db = open_db(storage_dir.path(), &[DB_VECTOR_CF]).unwrap(); | |
let mut vector_storage = open_simple_sparse_vector_storage(db, DB_VECTOR_CF, &stopped).unwrap(); | |
// add points to storage only once | |
for idx in 0..NUM_VECTORS { | |
let vec = &random_sparse_vector(&mut rnd, MAX_SPARSE_DIM); | |
vector_storage | |
.insert_vector(idx as PointOffsetType, vec.into()) | |
.unwrap(); | |
} | |
// save index config to disk | |
let index_config = SparseIndexConfig::new( | |
Some(10_000), | |
SparseIndexType::ImmutableRam, | |
Some(VectorStorageDatatype::Float32), | |
); | |
let vector_storage = Arc::new(AtomicRefCell::new(vector_storage)); | |
// intent: measure in-memory build time from storage | |
group.bench_function("build-ram-index", |b| { | |
b.iter(|| { | |
let sparse_vector_index: SparseVectorIndex<InvertedIndexRam> = | |
SparseVectorIndex::open(SparseVectorIndexOpenArgs { | |
config: index_config, | |
id_tracker: id_tracker.clone(), | |
vector_storage: vector_storage.clone(), | |
payload_index: wrapped_payload_index.clone(), | |
path: index_dir.path(), | |
stopped: &stopped, | |
tick_progress: || (), | |
}) | |
.unwrap(); | |
assert_eq!(sparse_vector_index.indexed_vector_count(), NUM_VECTORS); | |
}) | |
}); | |
// build once to reuse in mmap conversion benchmark | |
let sparse_vector_index: SparseVectorIndex<InvertedIndexRam> = | |
SparseVectorIndex::open(SparseVectorIndexOpenArgs { | |
config: index_config, | |
id_tracker, | |
vector_storage, | |
payload_index: wrapped_payload_index, | |
path: index_dir.path(), | |
stopped: &stopped, | |
tick_progress: || (), | |
}) | |
.unwrap(); | |
// intent: measure mmap conversion time | |
group.bench_function("convert-mmap-index", |b| { | |
b.iter(|| { | |
let mmap_index_dir = Builder::new().prefix("mmap_index_dir").tempdir().unwrap(); | |
let mmap_inverted_index = InvertedIndexMmap::from_ram_index( | |
Cow::Borrowed(sparse_vector_index.inverted_index()), | |
&mmap_index_dir, | |
) | |
.unwrap(); | |
assert_eq!(mmap_inverted_index.vector_count(), NUM_VECTORS); | |
}) | |
}); | |
group.bench_function("convert-mmap-index-f32", |b| { | |
b.iter(|| { | |
let mmap_index_dir = Builder::new().prefix("mmap_index_dir").tempdir().unwrap(); | |
let mmap_inverted_index = InvertedIndexCompressedMmap::<f32>::from_ram_index( | |
Cow::Borrowed(sparse_vector_index.inverted_index()), | |
&mmap_index_dir, | |
) | |
.unwrap(); | |
assert_eq!(mmap_inverted_index.vector_count(), NUM_VECTORS); | |
}) | |
}); | |
group.bench_function("convert-mmap-index-f16", |b| { | |
b.iter(|| { | |
let mmap_index_dir = Builder::new().prefix("mmap_index_dir").tempdir().unwrap(); | |
let mmap_inverted_index = InvertedIndexCompressedMmap::<f16>::from_ram_index( | |
Cow::Borrowed(sparse_vector_index.inverted_index()), | |
&mmap_index_dir, | |
) | |
.unwrap(); | |
assert_eq!(mmap_inverted_index.vector_count(), NUM_VECTORS); | |
}) | |
}); | |
group.finish(); | |
} | |
criterion_group! { | |
name = benches; | |
config = Criterion::default().with_profiler(prof::FlamegraphProfiler::new(100)); | |
targets = sparse_vector_index_build_benchmark | |
} | |
criterion_group! { | |
name = benches; | |
config = Criterion::default(); | |
targets = sparse_vector_index_build_benchmark, | |
} | |
criterion_main!(benches); | |