Spaces:
Build error
Build error
const NUM_VECTORS_1: u64 = 300; | |
const NUM_VECTORS_2: u64 = 500; | |
use std::sync::atomic::AtomicBool; | |
use common::cpu::CpuPermit; | |
use segment::data_types::named_vectors::NamedVectors; | |
use segment::entry::entry_point::SegmentEntry; | |
use segment::index::hnsw_index::num_rayon_threads; | |
use segment::segment::Segment; | |
use segment::segment_constructor::segment_builder::SegmentBuilder; | |
use segment::segment_constructor::simple_segment_constructor::build_multivec_segment; | |
use segment::types::Distance; | |
use segment::vector_storage::VectorStorage; | |
use tempfile::Builder; | |
fn test_rebuild_with_removed_vectors() { | |
let dir = Builder::new().prefix("segment_dir").tempdir().unwrap(); | |
let temp_dir = Builder::new().prefix("segment_temp_dir").tempdir().unwrap(); | |
let stopped = AtomicBool::new(false); | |
let mut segment1 = build_multivec_segment(dir.path(), 4, 6, Distance::Dot).unwrap(); | |
let mut segment2 = build_multivec_segment(dir.path(), 4, 6, Distance::Dot).unwrap(); | |
for i in 0..NUM_VECTORS_1 { | |
segment1 | |
.upsert_point( | |
1, | |
i.into(), | |
NamedVectors::from_pairs([ | |
("vector1".to_string(), vec![i as f32, 0., 0., 0.]), | |
("vector2".to_string(), vec![0., i as f32, 0., 0., 0., 0.]), | |
]), | |
) | |
.unwrap(); | |
} | |
for i in 0..NUM_VECTORS_2 { | |
let vectors = if i % 5 == 0 { | |
NamedVectors::from_pairs([("vector1".to_string(), vec![0., 0., i as f32, 0.])]) | |
} else { | |
NamedVectors::from_pairs([ | |
("vector1".to_string(), vec![0., 0., i as f32, 0.]), | |
("vector2".to_string(), vec![0., 0., 0., i as f32, 0., 0.]), | |
]) | |
}; | |
segment2 | |
.upsert_point(1, (NUM_VECTORS_1 + i).into(), vectors) | |
.unwrap(); | |
} | |
for i in 0..NUM_VECTORS_2 { | |
if i % 3 == 0 { | |
segment2 | |
.delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector1") | |
.unwrap(); | |
segment2 | |
.delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector2") | |
.unwrap(); | |
} | |
if i % 3 == 1 { | |
segment2 | |
.delete_vector(2, (NUM_VECTORS_1 + i).into(), "vector2") | |
.unwrap(); | |
} | |
if i % 2 == 0 { | |
segment2 | |
.delete_point(2, (NUM_VECTORS_1 + i).into()) | |
.unwrap(); | |
} | |
} | |
let mut reference = vec![]; | |
for i in 0..20 { | |
if i % 2 == 0 { | |
continue; | |
} | |
let idx = NUM_VECTORS_1 + i; | |
let vec = segment2.all_vectors(idx.into()).unwrap(); | |
reference.push(vec); | |
} | |
let mut builder = | |
SegmentBuilder::new(dir.path(), temp_dir.path(), &segment1.segment_config).unwrap(); | |
builder.update(&[&segment1, &segment2], &stopped).unwrap(); | |
let permit_cpu_count = num_rayon_threads(0); | |
let permit = CpuPermit::dummy(permit_cpu_count as u32); | |
let merged_segment: Segment = builder.build(permit, &stopped).unwrap(); | |
let merged_points_count = merged_segment.available_point_count(); | |
assert_eq!( | |
merged_points_count, | |
(NUM_VECTORS_1 + NUM_VECTORS_2 / 2) as usize | |
); | |
let vec1_count = merged_segment | |
.vector_data | |
.get("vector1") | |
.unwrap() | |
.vector_storage | |
.borrow() | |
.available_vector_count(); | |
let vec2_count = merged_segment | |
.vector_data | |
.get("vector2") | |
.unwrap() | |
.vector_storage | |
.borrow() | |
.available_vector_count(); | |
assert_ne!(vec1_count, vec2_count); | |
assert!(vec1_count > NUM_VECTORS_1 as usize); | |
assert!(vec2_count > NUM_VECTORS_1 as usize); | |
assert!(vec1_count < NUM_VECTORS_1 as usize + NUM_VECTORS_2 as usize); | |
assert!(vec2_count < NUM_VECTORS_1 as usize + NUM_VECTORS_2 as usize); | |
let mut merged_reference = vec![]; | |
for i in 0..20 { | |
if i % 2 == 0 { | |
continue; | |
} | |
let idx = NUM_VECTORS_1 + i; | |
let vec = merged_segment.all_vectors(idx.into()).unwrap(); | |
merged_reference.push(vec); | |
} | |
for i in 0..merged_reference.len() { | |
assert_eq!(merged_reference[i], reference[i]); | |
} | |
} | |