File size: 3,430 Bytes
84d2a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
use std::path::Path;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;

use atomic_refcell::AtomicRefCell;
use common::types::PointOffsetType;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::distributions::Standard;
use rand::Rng;
use segment::common::rocksdb_wrapper::{open_db, DB_VECTOR_CF};
use segment::data_types::vectors::{DenseVector, VectorInternal, VectorRef};
use segment::fixtures::payload_context_fixture::FixtureIdTracker;
use segment::id_tracker::IdTrackerSS;
use segment::types::Distance;
use segment::vector_storage::dense::simple_dense_vector_storage::open_simple_dense_vector_storage;
use segment::vector_storage::{new_raw_scorer, VectorStorage, VectorStorageEnum};
use tempfile::Builder;

const NUM_VECTORS: usize = 100000;
const DIM: usize = 1024; // Larger dimensionality - greater the SIMD advantage

fn random_vector(size: usize) -> DenseVector {
    let rng = rand::thread_rng();

    rng.sample_iter(Standard).take(size).collect()
}

fn init_vector_storage(
    path: &Path,
    dim: usize,
    num: usize,
    dist: Distance,
) -> (VectorStorageEnum, Arc<AtomicRefCell<IdTrackerSS>>) {
    let db = open_db(path, &[DB_VECTOR_CF]).unwrap();
    let id_tracker = Arc::new(AtomicRefCell::new(FixtureIdTracker::new(num)));
    let mut storage =
        open_simple_dense_vector_storage(db, DB_VECTOR_CF, dim, dist, &AtomicBool::new(false))
            .unwrap();
    {
        for i in 0..num {
            let vector: VectorInternal = random_vector(dim).into();
            storage
                .insert_vector(i as PointOffsetType, VectorRef::from(&vector))
                .unwrap();
        }
    }

    (storage, id_tracker)
}

fn benchmark_naive(c: &mut Criterion) {
    let dir = Builder::new().prefix("storage_dir").tempdir().unwrap();

    let dist = Distance::Dot;
    let (storage, id_tracker) = init_vector_storage(dir.path(), DIM, NUM_VECTORS, dist);
    let borrowed_id_tracker = id_tracker.borrow();

    let mut group = c.benchmark_group("storage-score-all");

    group.bench_function("storage vector search", |b| {
        b.iter(|| {
            let vector = random_vector(DIM);
            let vector = vector.as_slice().into();
            new_raw_scorer(
                vector,
                &storage,
                borrowed_id_tracker.deleted_point_bitslice(),
            )
            .unwrap()
            .peek_top_all(10)
        })
    });
}

fn random_access_benchmark(c: &mut Criterion) {
    let dir = Builder::new().prefix("storage_dir").tempdir().unwrap();

    let dist = Distance::Dot;
    let (storage, id_tracker) = init_vector_storage(dir.path(), DIM, NUM_VECTORS, dist);
    let borrowed_id_tracker = id_tracker.borrow();

    let mut group = c.benchmark_group("storage-score-random");

    let vector = random_vector(DIM);
    let vector = vector.as_slice().into();

    let scorer = new_raw_scorer(
        vector,
        &storage,
        borrowed_id_tracker.deleted_point_bitslice(),
    )
    .unwrap();

    let mut total_score = 0.;
    group.bench_function("storage vector search", |b| {
        b.iter(|| {
            let random_id = rand::thread_rng().gen_range(0..NUM_VECTORS) as PointOffsetType;
            total_score += scorer.score_point(random_id);
        })
    });
    eprintln!("total_score = {total_score:?}");
}

criterion_group!(benches, benchmark_naive, random_access_benchmark);
criterion_main!(benches);