Gouzi Mohaled
Ajout du dossier lib
84d2a97
use std::collections::HashSet;
use std::path::Path;
use parking_lot::RwLock;
use rand::rngs::ThreadRng;
use rand::Rng;
use segment::data_types::named_vectors::NamedVectors;
use segment::data_types::vectors::only_default_vector;
use segment::entry::entry_point::SegmentEntry;
use segment::segment::Segment;
use segment::segment_constructor::simple_segment_constructor::{
build_multivec_segment, build_simple_segment,
};
use segment::types::{Distance, Payload, PointIdType, SeqNumberType};
use serde_json::json;
use crate::collection_manager::holders::segment_holder::SegmentHolder;
use crate::collection_manager::optimizers::indexing_optimizer::IndexingOptimizer;
use crate::collection_manager::optimizers::merge_optimizer::MergeOptimizer;
use crate::collection_manager::optimizers::segment_optimizer::OptimizerThresholds;
use crate::config::CollectionParams;
use crate::operations::types::VectorsConfig;
use crate::operations::vector_params_builder::VectorParamsBuilder;
pub fn empty_segment(path: &Path) -> Segment {
build_simple_segment(path, 4, Distance::Dot).unwrap()
}
/// A generator for random point IDs
#[derive(Default)]
pub(crate) struct PointIdGenerator {
thread_rng: ThreadRng,
used: HashSet<u64>,
}
impl PointIdGenerator {
#[inline]
pub fn random(&mut self) -> PointIdType {
self.thread_rng.gen_range(1..u64::MAX).into()
}
#[inline]
pub fn unique(&mut self) -> PointIdType {
for _ in 0..100_000 {
let id = self.random();
if let PointIdType::NumId(num) = id {
if self.used.insert(num) {
return id;
}
}
}
panic!("failed to generate unique point ID after 100000 attempts");
}
}
pub fn random_multi_vec_segment(
path: &Path,
opnum: SeqNumberType,
num_vectors: u64,
dim1: usize,
dim2: usize,
) -> Segment {
let mut id_gen = PointIdGenerator::default();
let mut segment = build_multivec_segment(path, dim1, dim2, Distance::Dot).unwrap();
let mut rnd = rand::thread_rng();
let payload_key = "number";
let keyword_key = "keyword";
for _ in 0..num_vectors {
let random_vector1: Vec<_> = (0..dim1).map(|_| rnd.gen_range(0.0..1.0)).collect();
let random_vector2: Vec<_> = (0..dim2).map(|_| rnd.gen_range(0.0..1.0)).collect();
let mut vectors = NamedVectors::default();
vectors.insert("vector1".to_owned(), random_vector1.into());
vectors.insert("vector2".to_owned(), random_vector2.into());
let point_id: PointIdType = id_gen.unique();
let payload_value = rnd.gen_range(1..1_000);
let random_keyword = format!("keyword_{}", rnd.gen_range(1..10));
let payload: Payload =
json!({ payload_key: vec![payload_value], keyword_key: random_keyword}).into();
segment.upsert_point(opnum, point_id, vectors).unwrap();
segment
.set_payload(opnum, point_id, &payload, &None)
.unwrap();
}
segment
}
pub fn random_segment(path: &Path, opnum: SeqNumberType, num_vectors: u64, dim: usize) -> Segment {
let mut id_gen = PointIdGenerator::default();
let mut segment = build_simple_segment(path, dim, Distance::Dot).unwrap();
let mut rnd = rand::thread_rng();
let payload_key = "number";
for _ in 0..num_vectors {
let random_vector: Vec<_> = (0..dim).map(|_| rnd.gen_range(0.0..1.0)).collect();
let point_id: PointIdType = id_gen.unique();
let payload_value = rnd.gen_range(1..1_000);
let payload: Payload = json!({ payload_key: vec![payload_value] }).into();
segment
.upsert_point(opnum, point_id, only_default_vector(&random_vector))
.unwrap();
segment
.set_payload(opnum, point_id, &payload, &None)
.unwrap();
}
segment
}
pub fn build_segment_1(path: &Path) -> Segment {
let mut segment1 = empty_segment(path);
let vec1 = vec![1.0, 0.0, 1.0, 1.0];
let vec2 = vec![1.0, 0.0, 1.0, 0.0];
let vec3 = vec![1.0, 1.0, 1.0, 1.0];
let vec4 = vec![1.0, 1.0, 0.0, 1.0];
let vec5 = vec![1.0, 0.0, 0.0, 0.0];
segment1
.upsert_point(1, 1.into(), only_default_vector(&vec1))
.unwrap();
segment1
.upsert_point(2, 2.into(), only_default_vector(&vec2))
.unwrap();
segment1
.upsert_point(3, 3.into(), only_default_vector(&vec3))
.unwrap();
segment1
.upsert_point(4, 4.into(), only_default_vector(&vec4))
.unwrap();
segment1
.upsert_point(5, 5.into(), only_default_vector(&vec5))
.unwrap();
let payload_key = "color";
let payload_option1: Payload = json!({ payload_key: vec!["red".to_owned()] }).into();
let payload_option2: Payload =
json!({ payload_key: vec!["red".to_owned(), "blue".to_owned()] }).into();
let payload_option3: Payload = json!({ payload_key: vec!["blue".to_owned()] }).into();
segment1
.set_payload(6, 1.into(), &payload_option1, &None)
.unwrap();
segment1
.set_payload(6, 2.into(), &payload_option1, &None)
.unwrap();
segment1
.set_payload(6, 3.into(), &payload_option3, &None)
.unwrap();
segment1
.set_payload(6, 4.into(), &payload_option2, &None)
.unwrap();
segment1
.set_payload(6, 5.into(), &payload_option2, &None)
.unwrap();
segment1
}
pub fn build_segment_2(path: &Path) -> Segment {
let mut segment2 = empty_segment(path);
let vec4 = vec![1.0, 1.0, 0.0, 1.0];
let vec5 = vec![1.0, 0.0, 0.0, 0.0];
let vec11 = vec![1.0, 1.0, 1.0, 1.0];
let vec12 = vec![1.0, 1.0, 1.0, 0.0];
let vec13 = vec![1.0, 0.0, 1.0, 1.0];
let vec14 = vec![1.0, 0.0, 0.0, 1.0];
let vec15 = vec![1.0, 1.0, 0.0, 0.0];
segment2
.upsert_point(7, 4.into(), only_default_vector(&vec4))
.unwrap();
segment2
.upsert_point(8, 5.into(), only_default_vector(&vec5))
.unwrap();
segment2
.upsert_point(11, 11.into(), only_default_vector(&vec11))
.unwrap();
segment2
.upsert_point(12, 12.into(), only_default_vector(&vec12))
.unwrap();
segment2
.upsert_point(13, 13.into(), only_default_vector(&vec13))
.unwrap();
segment2
.upsert_point(14, 14.into(), only_default_vector(&vec14))
.unwrap();
segment2
.upsert_point(15, 15.into(), only_default_vector(&vec15))
.unwrap();
segment2
}
pub fn build_test_holder(path: &Path) -> RwLock<SegmentHolder> {
let segment1 = build_segment_1(path);
let segment2 = build_segment_2(path);
let mut holder = SegmentHolder::default();
let _sid1 = holder.add_new(segment1);
let _sid2 = holder.add_new(segment2);
RwLock::new(holder)
}
pub(crate) fn get_merge_optimizer(
segment_path: &Path,
collection_temp_dir: &Path,
dim: usize,
optimizer_thresholds: Option<OptimizerThresholds>,
) -> MergeOptimizer {
MergeOptimizer::new(
5,
optimizer_thresholds.unwrap_or(OptimizerThresholds {
max_segment_size_kb: 100_000,
memmap_threshold_kb: 1_000_000,
indexing_threshold_kb: 1_000_000,
}),
segment_path.to_owned(),
collection_temp_dir.to_owned(),
CollectionParams {
vectors: VectorsConfig::Single(
VectorParamsBuilder::new(dim as u64, Distance::Dot).build(),
),
..CollectionParams::empty()
},
Default::default(),
Default::default(),
)
}
pub(crate) fn get_indexing_optimizer(
segment_path: &Path,
collection_temp_dir: &Path,
dim: usize,
) -> IndexingOptimizer {
IndexingOptimizer::new(
2,
OptimizerThresholds {
max_segment_size_kb: 100_000,
memmap_threshold_kb: 100,
indexing_threshold_kb: 100,
},
segment_path.to_owned(),
collection_temp_dir.to_owned(),
CollectionParams {
vectors: VectorsConfig::Single(
VectorParamsBuilder::new(dim as u64, Distance::Dot).build(),
),
..CollectionParams::empty()
},
Default::default(),
Default::default(),
)
}