Spaces:
Build error
Build error
File size: 4,288 Bytes
84d2a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
const NUM_VECTORS_1: u64 = 300;
const NUM_VECTORS_2: u64 = 500;
use std::sync::atomic::AtomicBool;
use common::cpu::CpuPermit;
use segment::data_types::named_vectors::NamedVectors;
use segment::entry::entry_point::SegmentEntry;
use segment::index::hnsw_index::num_rayon_threads;
use segment::segment::Segment;
use segment::segment_constructor::segment_builder::SegmentBuilder;
use segment::segment_constructor::simple_segment_constructor::build_multivec_segment;
use segment::types::Distance;
use segment::vector_storage::VectorStorage;
use tempfile::Builder;
#[test]
fn test_rebuild_with_removed_vectors() {
let dir = Builder::new().prefix("segment_dir").tempdir().unwrap();
let temp_dir = Builder::new().prefix("segment_temp_dir").tempdir().unwrap();
let stopped = AtomicBool::new(false);
let mut segment1 = build_multivec_segment(dir.path(), 4, 6, Distance::Dot).unwrap();
let mut segment2 = build_multivec_segment(dir.path(), 4, 6, Distance::Dot).unwrap();
for i in 0..NUM_VECTORS_1 {
segment1
.upsert_point(
1,
i.into(),
NamedVectors::from_pairs([
("vector1".to_string(), vec![i as f32, 0., 0., 0.]),
("vector2".to_string(), vec![0., i as f32, 0., 0., 0., 0.]),
]),
)
.unwrap();
}
for i in 0..NUM_VECTORS_2 {
let vectors = if i % 5 == 0 {
NamedVectors::from_pairs([("vector1".to_string(), vec![0., 0., i as f32, 0.])])
} else {
NamedVectors::from_pairs([
("vector1".to_string(), vec![0., 0., i as f32, 0.]),
("vector2".to_string(), vec![0., 0., 0., i as f32, 0., 0.]),
])
};
segment2
.upsert_point(1, (NUM_VECTORS_1 + i).into(), vectors)
.unwrap();
}
for i in 0..NUM_VECTORS_2 {
if i % 3 == 0 {
segment2
.delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector1")
.unwrap();
segment2
.delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector2")
.unwrap();
}
if i % 3 == 1 {
segment2
.delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector2")
.unwrap();
}
if i % 2 == 0 {
segment2
.delete_point(2, (NUM_VECTORS_1 + i).into())
.unwrap();
}
}
let mut reference = vec![];
for i in 0..20 {
if i % 2 == 0 {
continue;
}
let idx = NUM_VECTORS_1 + i;
let vec = segment2.all_vectors(idx.into()).unwrap();
reference.push(vec);
}
let mut builder =
SegmentBuilder::new(dir.path(), temp_dir.path(), &segment1.segment_config).unwrap();
builder.update(&[&segment1, &segment2], &stopped).unwrap();
let permit_cpu_count = num_rayon_threads(0);
let permit = CpuPermit::dummy(permit_cpu_count as u32);
let merged_segment: Segment = builder.build(permit, &stopped).unwrap();
let merged_points_count = merged_segment.available_point_count();
assert_eq!(
merged_points_count,
(NUM_VECTORS_1 + NUM_VECTORS_2 / 2) as usize
);
let vec1_count = merged_segment
.vector_data
.get("vector1")
.unwrap()
.vector_storage
.borrow()
.available_vector_count();
let vec2_count = merged_segment
.vector_data
.get("vector2")
.unwrap()
.vector_storage
.borrow()
.available_vector_count();
assert_ne!(vec1_count, vec2_count);
assert!(vec1_count > NUM_VECTORS_1 as usize);
assert!(vec2_count > NUM_VECTORS_1 as usize);
assert!(vec1_count < NUM_VECTORS_1 as usize + NUM_VECTORS_2 as usize);
assert!(vec2_count < NUM_VECTORS_1 as usize + NUM_VECTORS_2 as usize);
let mut merged_reference = vec![];
for i in 0..20 {
if i % 2 == 0 {
continue;
}
let idx = NUM_VECTORS_1 + i;
let vec = merged_segment.all_vectors(idx.into()).unwrap();
merged_reference.push(vec);
}
for i in 0..merged_reference.len() {
assert_eq!(merged_reference[i], reference[i]);
}
}
|