File size: 4,288 Bytes
84d2a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
const NUM_VECTORS_1: u64 = 300;
const NUM_VECTORS_2: u64 = 500;

use std::sync::atomic::AtomicBool;

use common::cpu::CpuPermit;
use segment::data_types::named_vectors::NamedVectors;
use segment::entry::entry_point::SegmentEntry;
use segment::index::hnsw_index::num_rayon_threads;
use segment::segment::Segment;
use segment::segment_constructor::segment_builder::SegmentBuilder;
use segment::segment_constructor::simple_segment_constructor::build_multivec_segment;
use segment::types::Distance;
use segment::vector_storage::VectorStorage;
use tempfile::Builder;

#[test]
fn test_rebuild_with_removed_vectors() {
    let dir = Builder::new().prefix("segment_dir").tempdir().unwrap();
    let temp_dir = Builder::new().prefix("segment_temp_dir").tempdir().unwrap();

    let stopped = AtomicBool::new(false);

    let mut segment1 = build_multivec_segment(dir.path(), 4, 6, Distance::Dot).unwrap();
    let mut segment2 = build_multivec_segment(dir.path(), 4, 6, Distance::Dot).unwrap();

    for i in 0..NUM_VECTORS_1 {
        segment1
            .upsert_point(
                1,
                i.into(),
                NamedVectors::from_pairs([
                    ("vector1".to_string(), vec![i as f32, 0., 0., 0.]),
                    ("vector2".to_string(), vec![0., i as f32, 0., 0., 0., 0.]),
                ]),
            )
            .unwrap();
    }

    for i in 0..NUM_VECTORS_2 {
        let vectors = if i % 5 == 0 {
            NamedVectors::from_pairs([("vector1".to_string(), vec![0., 0., i as f32, 0.])])
        } else {
            NamedVectors::from_pairs([
                ("vector1".to_string(), vec![0., 0., i as f32, 0.]),
                ("vector2".to_string(), vec![0., 0., 0., i as f32, 0., 0.]),
            ])
        };

        segment2
            .upsert_point(1, (NUM_VECTORS_1 + i).into(), vectors)
            .unwrap();
    }

    for i in 0..NUM_VECTORS_2 {
        if i % 3 == 0 {
            segment2
                .delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector1")
                .unwrap();
            segment2
                .delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector2")
                .unwrap();
        }
        if i % 3 == 1 {
            segment2
                .delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector2")
                .unwrap();
        }
        if i % 2 == 0 {
            segment2
                .delete_point(2, (NUM_VECTORS_1 + i).into())
                .unwrap();
        }
    }

    let mut reference = vec![];

    for i in 0..20 {
        if i % 2 == 0 {
            continue;
        }
        let idx = NUM_VECTORS_1 + i;
        let vec = segment2.all_vectors(idx.into()).unwrap();
        reference.push(vec);
    }

    let mut builder =
        SegmentBuilder::new(dir.path(), temp_dir.path(), &segment1.segment_config).unwrap();

    builder.update(&[&segment1, &segment2], &stopped).unwrap();

    let permit_cpu_count = num_rayon_threads(0);
    let permit = CpuPermit::dummy(permit_cpu_count as u32);

    let merged_segment: Segment = builder.build(permit, &stopped).unwrap();

    let merged_points_count = merged_segment.available_point_count();

    assert_eq!(
        merged_points_count,
        (NUM_VECTORS_1 + NUM_VECTORS_2 / 2) as usize
    );

    let vec1_count = merged_segment
        .vector_data
        .get("vector1")
        .unwrap()
        .vector_storage
        .borrow()
        .available_vector_count();
    let vec2_count = merged_segment
        .vector_data
        .get("vector2")
        .unwrap()
        .vector_storage
        .borrow()
        .available_vector_count();

    assert_ne!(vec1_count, vec2_count);

    assert!(vec1_count > NUM_VECTORS_1 as usize);
    assert!(vec2_count > NUM_VECTORS_1 as usize);
    assert!(vec1_count < NUM_VECTORS_1 as usize + NUM_VECTORS_2 as usize);
    assert!(vec2_count < NUM_VECTORS_1 as usize + NUM_VECTORS_2 as usize);

    let mut merged_reference = vec![];

    for i in 0..20 {
        if i % 2 == 0 {
            continue;
        }
        let idx = NUM_VECTORS_1 + i;
        let vec = merged_segment.all_vectors(idx.into()).unwrap();
        merged_reference.push(vec);
    }

    for i in 0..merged_reference.len() {
        assert_eq!(merged_reference[i], reference[i]);
    }
}