File size: 6,279 Bytes
84d2a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#[cfg(not(target_os = "windows"))]
mod prof;

use std::borrow::Cow;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;

use atomic_refcell::AtomicRefCell;
use common::types::PointOffsetType;
use criterion::{criterion_group, criterion_main, Criterion};
use half::f16;
use rand::rngs::StdRng;
use rand::SeedableRng;
use segment::common::rocksdb_wrapper::{open_db, DB_VECTOR_CF};
use segment::fixtures::payload_context_fixture::FixtureIdTracker;
use segment::index::sparse_index::sparse_index_config::{SparseIndexConfig, SparseIndexType};
use segment::index::sparse_index::sparse_vector_index::{
    SparseVectorIndex, SparseVectorIndexOpenArgs,
};
use segment::index::struct_payload_index::StructPayloadIndex;
use segment::index::VectorIndex;
use segment::payload_storage::in_memory_payload_storage::InMemoryPayloadStorage;
use segment::types::VectorStorageDatatype;
use segment::vector_storage::simple_sparse_vector_storage::open_simple_sparse_vector_storage;
use segment::vector_storage::VectorStorage;
use sparse::common::sparse_vector_fixture::random_sparse_vector;
use sparse::index::inverted_index::inverted_index_compressed_mmap::InvertedIndexCompressedMmap;
use sparse::index::inverted_index::inverted_index_mmap::InvertedIndexMmap;
use sparse::index::inverted_index::inverted_index_ram::InvertedIndexRam;
use sparse::index::inverted_index::InvertedIndex;
use tempfile::Builder;

const NUM_VECTORS: usize = 10_000;
const MAX_SPARSE_DIM: usize = 1_000;

fn sparse_vector_index_build_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("sparse-vector-build-group");

    let stopped = AtomicBool::new(false);
    let mut rnd = StdRng::seed_from_u64(42);

    let payload_dir = Builder::new().prefix("payload_dir").tempdir().unwrap();
    let storage_dir = Builder::new().prefix("storage_dir").tempdir().unwrap();
    let index_dir = Builder::new().prefix("index_dir").tempdir().unwrap();

    // setup
    let id_tracker = Arc::new(AtomicRefCell::new(FixtureIdTracker::new(NUM_VECTORS)));
    let payload_storage = InMemoryPayloadStorage::default();
    let wrapped_payload_storage = Arc::new(AtomicRefCell::new(payload_storage.into()));
    let payload_index = StructPayloadIndex::open(
        wrapped_payload_storage,
        id_tracker.clone(),
        std::collections::HashMap::new(),
        payload_dir.path(),
        true,
    )
    .unwrap();
    let wrapped_payload_index = Arc::new(AtomicRefCell::new(payload_index));

    let db = open_db(storage_dir.path(), &[DB_VECTOR_CF]).unwrap();
    let mut vector_storage = open_simple_sparse_vector_storage(db, DB_VECTOR_CF, &stopped).unwrap();

    // add points to storage only once
    for idx in 0..NUM_VECTORS {
        let vec = &random_sparse_vector(&mut rnd, MAX_SPARSE_DIM);
        vector_storage
            .insert_vector(idx as PointOffsetType, vec.into())
            .unwrap();
    }

    // save index config to disk
    let index_config = SparseIndexConfig::new(
        Some(10_000),
        SparseIndexType::ImmutableRam,
        Some(VectorStorageDatatype::Float32),
    );

    let vector_storage = Arc::new(AtomicRefCell::new(vector_storage));

    // intent: measure in-memory build time from storage
    group.bench_function("build-ram-index", |b| {
        b.iter(|| {
            let sparse_vector_index: SparseVectorIndex<InvertedIndexRam> =
                SparseVectorIndex::open(SparseVectorIndexOpenArgs {
                    config: index_config,
                    id_tracker: id_tracker.clone(),
                    vector_storage: vector_storage.clone(),
                    payload_index: wrapped_payload_index.clone(),
                    path: index_dir.path(),
                    stopped: &stopped,
                    tick_progress: || (),
                })
                .unwrap();
            assert_eq!(sparse_vector_index.indexed_vector_count(), NUM_VECTORS);
        })
    });

    // build once to reuse in mmap conversion benchmark
    let sparse_vector_index: SparseVectorIndex<InvertedIndexRam> =
        SparseVectorIndex::open(SparseVectorIndexOpenArgs {
            config: index_config,
            id_tracker,
            vector_storage,
            payload_index: wrapped_payload_index,
            path: index_dir.path(),
            stopped: &stopped,
            tick_progress: || (),
        })
        .unwrap();

    // intent: measure mmap conversion time
    group.bench_function("convert-mmap-index", |b| {
        b.iter(|| {
            let mmap_index_dir = Builder::new().prefix("mmap_index_dir").tempdir().unwrap();
            let mmap_inverted_index = InvertedIndexMmap::from_ram_index(
                Cow::Borrowed(sparse_vector_index.inverted_index()),
                &mmap_index_dir,
            )
            .unwrap();
            assert_eq!(mmap_inverted_index.vector_count(), NUM_VECTORS);
        })
    });

    group.bench_function("convert-mmap-index-f32", |b| {
        b.iter(|| {
            let mmap_index_dir = Builder::new().prefix("mmap_index_dir").tempdir().unwrap();
            let mmap_inverted_index = InvertedIndexCompressedMmap::<f32>::from_ram_index(
                Cow::Borrowed(sparse_vector_index.inverted_index()),
                &mmap_index_dir,
            )
            .unwrap();
            assert_eq!(mmap_inverted_index.vector_count(), NUM_VECTORS);
        })
    });

    group.bench_function("convert-mmap-index-f16", |b| {
        b.iter(|| {
            let mmap_index_dir = Builder::new().prefix("mmap_index_dir").tempdir().unwrap();
            let mmap_inverted_index = InvertedIndexCompressedMmap::<f16>::from_ram_index(
                Cow::Borrowed(sparse_vector_index.inverted_index()),
                &mmap_index_dir,
            )
            .unwrap();
            assert_eq!(mmap_inverted_index.vector_count(), NUM_VECTORS);
        })
    });

    group.finish();
}

#[cfg(not(target_os = "windows"))]
criterion_group! {
    name = benches;
    config = Criterion::default().with_profiler(prof::FlamegraphProfiler::new(100));
    targets = sparse_vector_index_build_benchmark
}

#[cfg(target_os = "windows")]
criterion_group! {
    name = benches;
    config = Criterion::default();
    targets = sparse_vector_index_build_benchmark,
}

criterion_main!(benches);