#![allow(deprecated)] use std::collections::HashMap; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use crate::types::{ Distance, HnswConfig, Indexes, PayloadStorageType, QuantizationConfig, SegmentConfig, SegmentState, SeqNumberType, VectorDataConfig, VectorStorageType, }; #[derive(Default, Debug, Deserialize, Serialize, JsonSchema, Clone)] #[serde(rename_all = "snake_case")] #[deprecated = "use SegmentConfig instead"] pub struct SegmentConfigV5 { pub vector_data: HashMap, /// Type of index used for search pub index: Indexes, /// Type of vector storage pub storage_type: StorageTypeV5, /// Defines payload storage type #[serde(default)] pub payload_storage_type: PayloadStorageType, /// Quantization parameters. If none - quantization is disabled. #[serde(default)] pub quantization_config: Option, } impl From for SegmentConfig { fn from(old_segment: SegmentConfigV5) -> Self { let vector_data = old_segment .vector_data .into_iter() .map(|(vector_name, old_data)| { let new_data = VectorDataConfig { size: old_data.size, distance: old_data.distance, // Use HNSW index if vector specific one is set, or fall back to segment index index: match old_data.hnsw_config { Some(hnsw_config) => Indexes::Hnsw(hnsw_config), None => old_segment.index.clone(), }, // Remove vector specific quantization config if no segment one is set // This is required because in some cases this was incorrectly set on the vector // level quantization_config: old_segment .quantization_config .as_ref() .and(old_data.quantization_config), // Mmap if explicitly on disk, otherwise convert old storage type storage_type: (old_data.on_disk == Some(true)) .then_some(VectorStorageType::Mmap) .unwrap_or_else(|| old_segment.storage_type.into()), multivector_config: None, datatype: None, }; (vector_name, new_data) }) .collect(); SegmentConfig { vector_data, sparse_vector_data: Default::default(), payload_storage_type: old_segment.payload_storage_type, } } } /// Type of vector storage #[derive(Default, Debug, Deserialize, Serialize, JsonSchema, Copy, Clone, PartialEq, Eq)] #[serde(rename_all = "snake_case")] #[serde(tag = "type", content = "options")] #[deprecated] pub enum StorageTypeV5 { // Store vectors in memory and use persistence storage only if vectors are changed #[default] InMemory, // Use memmap to store vectors, a little slower than `InMemory`, but requires little RAM Mmap, } impl From for VectorStorageType { fn from(old: StorageTypeV5) -> Self { match old { StorageTypeV5::InMemory => Self::Memory, StorageTypeV5::Mmap => Self::Mmap, } } } /// Config of single vector data storage #[derive(Debug, Deserialize, Serialize, JsonSchema, Clone)] #[serde(rename_all = "snake_case")] #[deprecated = "use VectorDataConfig instead"] pub struct VectorDataConfigV5 { /// Size of a vectors used pub size: usize, /// Type of distance function used for measuring distance between vectors pub distance: Distance, /// Vector specific HNSW config that overrides collection config #[serde(default)] pub hnsw_config: Option, /// Vector specific quantization config that overrides collection config #[serde(default)] pub quantization_config: Option, /// If true - vectors will not be stored in memory. /// Instead, it will store vectors on mmap-files. /// If enabled, search performance will defined by disk speed /// and fraction of vectors that fit in RAM. #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] pub on_disk: Option, } #[derive(Debug, Deserialize, Clone)] #[serde(rename_all = "snake_case")] #[deprecated = "use SegmentState instead"] pub struct SegmentStateV5 { pub version: Option, pub config: SegmentConfigV5, } impl From for SegmentState { fn from(old: SegmentStateV5) -> Self { Self { version: old.version, config: old.config.into(), } } } #[cfg(test)] mod tests { use super::*; use crate::types::{ScalarQuantization, ScalarQuantizationConfig}; #[test] fn convert_from_v5_to_newest() { let old_segment = SegmentConfigV5 { vector_data: vec![ ( "vec1".to_string(), VectorDataConfigV5 { size: 10, distance: Distance::Dot, hnsw_config: Some(HnswConfig { m: 20, ef_construct: 100, full_scan_threshold: 10000, max_indexing_threads: 0, on_disk: None, payload_m: Some(10), }), quantization_config: None, on_disk: None, }, ), ( "vec2".to_string(), VectorDataConfigV5 { size: 10, distance: Distance::Dot, hnsw_config: None, quantization_config: Some(QuantizationConfig::Scalar(ScalarQuantization { scalar: ScalarQuantizationConfig { r#type: Default::default(), quantile: Some(0.99), always_ram: Some(true), }, })), on_disk: None, }, ), ] .into_iter() .collect(), index: Indexes::Hnsw(HnswConfig { m: 25, ef_construct: 120, full_scan_threshold: 10000, max_indexing_threads: 0, on_disk: None, payload_m: None, }), storage_type: StorageTypeV5::InMemory, payload_storage_type: PayloadStorageType::default(), quantization_config: None, }; let new_segment: SegmentConfig = old_segment.into(); eprintln!("new = {new_segment:#?}"); match &new_segment.vector_data.get("vec1").unwrap().index { Indexes::Plain { .. } => panic!("expected HNSW index"), Indexes::Hnsw(hnsw) => { assert_eq!(hnsw.m, 20); } } match &new_segment.vector_data.get("vec2").unwrap().index { Indexes::Plain { .. } => panic!("expected HNSW index"), Indexes::Hnsw(hnsw) => { assert_eq!(hnsw.m, 25); } } if new_segment .vector_data .get("vec1") .unwrap() .quantization_config .is_some() { panic!("expected no quantization"); } } #[test] fn convert_from_v5_to_newest_2() { let old_segment = SegmentConfigV5 { vector_data: vec![ ( "vec1".to_string(), VectorDataConfigV5 { size: 10, distance: Distance::Dot, hnsw_config: None, quantization_config: None, on_disk: None, }, ), ( "vec2".to_string(), VectorDataConfigV5 { size: 10, distance: Distance::Dot, hnsw_config: None, quantization_config: Some(QuantizationConfig::Scalar(ScalarQuantization { scalar: ScalarQuantizationConfig { r#type: Default::default(), quantile: Some(0.99), always_ram: Some(true), }, })), on_disk: None, }, ), ] .into_iter() .collect(), index: Indexes::Hnsw(HnswConfig { m: 25, ef_construct: 120, full_scan_threshold: 10000, max_indexing_threads: 0, on_disk: None, payload_m: None, }), storage_type: StorageTypeV5::InMemory, payload_storage_type: PayloadStorageType::default(), quantization_config: Some(QuantizationConfig::Scalar(ScalarQuantization { scalar: ScalarQuantizationConfig { r#type: Default::default(), quantile: Some(0.95), always_ram: Some(true), }, })), }; let new_segment: SegmentConfig = old_segment.into(); eprintln!("new = {new_segment:#?}"); if new_segment .vector_data .get("vec1") .unwrap() .quantization_config .is_some() { panic!("expected no quantization"); } match &new_segment .vector_data .get("vec2") .unwrap() .quantization_config { Some(q) => match q { QuantizationConfig::Scalar(scalar) => { assert_eq!(scalar.scalar.quantile, Some(0.99)); } QuantizationConfig::Product(_) => { panic!("expected scalar quantization") } QuantizationConfig::Binary(_) => { panic!("expected scalar quantization") } }, _ => { panic!("expected quantization") } } } }