Gouzi Mohaled
Ajout du dossier lib
84d2a97
use std::fmt;
use std::path::PathBuf;
use bitvec::prelude::BitSlice;
use common::types::PointOffsetType;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use super::in_memory_id_tracker::InMemoryIdTracker;
use crate::common::operation_error::OperationResult;
use crate::common::Flusher;
use crate::id_tracker::immutable_id_tracker::ImmutableIdTracker;
use crate::id_tracker::simple_id_tracker::SimpleIdTracker;
use crate::types::{PointIdType, SeqNumberType};
/// Sampling randomness seed
///
/// Using seeded randomness so search results don't show randomness or 'inconsistencies' which
/// would otherwise be introduced by HNSW/ID tracker point sampling.
const SEED: u64 = 0b1011000011011110001110010101001010001011001101001010010001111010;
/// Trait for point ids tracker.
///
/// This tracker is used to convert external (i.e. user-facing) point id into internal point id
/// as well as for keeping track on point version
/// Internal ids are useful for contiguous-ness
pub trait IdTracker: fmt::Debug {
fn internal_version(&self, internal_id: PointOffsetType) -> Option<SeqNumberType>;
fn set_internal_version(
&mut self,
internal_id: PointOffsetType,
version: SeqNumberType,
) -> OperationResult<()>;
/// Returns internal ID of the point, which is used inside this segment
fn internal_id(&self, external_id: PointIdType) -> Option<PointOffsetType>;
/// Return external ID for internal point, defined by user
fn external_id(&self, internal_id: PointOffsetType) -> Option<PointIdType>;
/// Set mapping
fn set_link(
&mut self,
external_id: PointIdType,
internal_id: PointOffsetType,
) -> OperationResult<()>;
/// Drop mapping
fn drop(&mut self, external_id: PointIdType) -> OperationResult<()>;
/// Iterate over all external IDs
///
/// Count should match `available_point_count`.
fn iter_external(&self) -> Box<dyn Iterator<Item = PointIdType> + '_>;
/// Iterate over all internal IDs
///
/// Count should match `total_point_count`.
fn iter_internal(&self) -> Box<dyn Iterator<Item = PointOffsetType> + '_>;
/// Iterate starting from a given ID
fn iter_from(
&self,
external_id: Option<PointIdType>,
) -> Box<dyn Iterator<Item = (PointIdType, PointOffsetType)> + '_>;
/// Iterate over internal IDs (offsets)
///
/// - excludes removed points
fn iter_ids(&self) -> Box<dyn Iterator<Item = PointOffsetType> + '_>;
fn iter_random(&self) -> Box<dyn Iterator<Item = (PointIdType, PointOffsetType)> + '_>;
/// Iterate over internal IDs (offsets)
///
/// - excludes removed points
/// - excludes flagged items from `exclude_bitslice`
fn iter_ids_excluding<'a>(
&'a self,
exclude_bitslice: &'a BitSlice,
) -> Box<dyn Iterator<Item = PointOffsetType> + '_> {
Box::new(self.iter_ids().filter(|point| {
!exclude_bitslice
.get(*point as usize)
.as_deref()
.copied()
.unwrap_or(false)
}))
}
/// Flush id mapping to disk
fn mapping_flusher(&self) -> Flusher;
/// Flush points versions to disk
fn versions_flusher(&self) -> Flusher;
/// Number of total points
///
/// - includes soft deleted points
fn total_point_count(&self) -> usize;
/// Number of available points
///
/// - excludes soft deleted points
fn available_point_count(&self) -> usize {
self.total_point_count() - self.deleted_point_count()
}
/// Number of deleted points
fn deleted_point_count(&self) -> usize;
/// Get [`BitSlice`] representation for deleted points with deletion flags
///
/// The size of this slice is not guaranteed. It may be smaller/larger than the number of
/// vectors in this segment.
fn deleted_point_bitslice(&self) -> &BitSlice;
/// Check whether the given point is soft deleted
fn is_deleted_point(&self, internal_id: PointOffsetType) -> bool;
fn name(&self) -> &'static str;
/// Iterator over `n` random IDs which are not deleted
///
/// A [`BitSlice`] of deleted vectors may optionally be given to also consider deleted named
/// vectors.
fn sample_ids<'a>(
&'a self,
deleted_vector_bitslice: Option<&'a BitSlice>,
) -> Box<dyn Iterator<Item = PointOffsetType> + '_> {
// Use seeded randomness, prevents 'inconsistencies' in search results with sampling
let mut rng = StdRng::seed_from_u64(SEED);
let total = self.total_point_count() as PointOffsetType;
Box::new(
(0..total)
.map(move |_| rng.gen_range(0..total))
.filter(move |x| {
// Check for deleted vector first, as that is more likely
!deleted_vector_bitslice
.and_then(|d| d.get(*x as usize).as_deref().copied())
.unwrap_or(false)
// Also check point deletion for integrity
&& !self.is_deleted_point(*x)
}),
)
}
/// Finds inconsistencies between id mapping and versions storage.
/// It might happen that point doesn't have version due to un-flushed WAL.
/// This method makes those points usable again.
fn cleanup_versions(&mut self) -> OperationResult<()>;
fn files(&self) -> Vec<PathBuf>;
}
pub type IdTrackerSS = dyn IdTracker + Sync + Send;
#[derive(Debug)]
pub enum IdTrackerEnum {
MutableIdTracker(SimpleIdTracker),
ImmutableIdTracker(ImmutableIdTracker),
InMemoryIdTracker(InMemoryIdTracker),
}
impl IdTracker for IdTrackerEnum {
fn internal_version(&self, internal_id: PointOffsetType) -> Option<SeqNumberType> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.internal_version(internal_id),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => {
id_tracker.internal_version(internal_id)
}
IdTrackerEnum::InMemoryIdTracker(id_tracker) => {
id_tracker.internal_version(internal_id)
}
}
}
fn set_internal_version(
&mut self,
internal_id: PointOffsetType,
version: SeqNumberType,
) -> OperationResult<()> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => {
id_tracker.set_internal_version(internal_id, version)
}
IdTrackerEnum::ImmutableIdTracker(id_tracker) => {
id_tracker.set_internal_version(internal_id, version)
}
IdTrackerEnum::InMemoryIdTracker(id_tracker) => {
id_tracker.set_internal_version(internal_id, version)
}
}
}
fn internal_id(&self, external_id: PointIdType) -> Option<PointOffsetType> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.internal_id(external_id),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.internal_id(external_id),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.internal_id(external_id),
}
}
fn external_id(&self, internal_id: PointOffsetType) -> Option<PointIdType> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.external_id(internal_id),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.external_id(internal_id),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.external_id(internal_id),
}
}
fn set_link(
&mut self,
external_id: PointIdType,
internal_id: PointOffsetType,
) -> OperationResult<()> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => {
id_tracker.set_link(external_id, internal_id)
}
IdTrackerEnum::ImmutableIdTracker(id_tracker) => {
id_tracker.set_link(external_id, internal_id)
}
IdTrackerEnum::InMemoryIdTracker(id_tracker) => {
id_tracker.set_link(external_id, internal_id)
}
}
}
fn drop(&mut self, external_id: PointIdType) -> OperationResult<()> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.drop(external_id),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.drop(external_id),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.drop(external_id),
}
}
fn iter_external(&self) -> Box<dyn Iterator<Item = PointIdType> + '_> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.iter_external(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.iter_external(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.iter_external(),
}
}
fn iter_internal(&self) -> Box<dyn Iterator<Item = PointOffsetType> + '_> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.iter_internal(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.iter_internal(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.iter_internal(),
}
}
fn iter_from(
&self,
external_id: Option<PointIdType>,
) -> Box<dyn Iterator<Item = (PointIdType, PointOffsetType)> + '_> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.iter_from(external_id),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.iter_from(external_id),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.iter_from(external_id),
}
}
fn iter_ids(&self) -> Box<dyn Iterator<Item = PointOffsetType> + '_> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.iter_ids(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.iter_ids(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.iter_ids(),
}
}
fn iter_random(&self) -> Box<dyn Iterator<Item = (PointIdType, PointOffsetType)> + '_> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.iter_random(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.iter_random(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.iter_random(),
}
}
fn mapping_flusher(&self) -> Flusher {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.mapping_flusher(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.mapping_flusher(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.mapping_flusher(),
}
}
fn versions_flusher(&self) -> Flusher {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.versions_flusher(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.versions_flusher(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.versions_flusher(),
}
}
fn total_point_count(&self) -> usize {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.total_point_count(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.total_point_count(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.total_point_count(),
}
}
fn deleted_point_count(&self) -> usize {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.deleted_point_count(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.deleted_point_count(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.deleted_point_count(),
}
}
fn deleted_point_bitslice(&self) -> &BitSlice {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.deleted_point_bitslice(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.deleted_point_bitslice(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.deleted_point_bitslice(),
}
}
fn is_deleted_point(&self, internal_id: PointOffsetType) -> bool {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.is_deleted_point(internal_id),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => {
id_tracker.is_deleted_point(internal_id)
}
IdTrackerEnum::InMemoryIdTracker(id_tracker) => {
id_tracker.is_deleted_point(internal_id)
}
}
}
fn name(&self) -> &'static str {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.name(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.name(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.name(),
}
}
fn cleanup_versions(&mut self) -> OperationResult<()> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.cleanup_versions(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.cleanup_versions(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.cleanup_versions(),
}
}
fn files(&self) -> Vec<PathBuf> {
match self {
IdTrackerEnum::MutableIdTracker(id_tracker) => id_tracker.files(),
IdTrackerEnum::ImmutableIdTracker(id_tracker) => id_tracker.files(),
IdTrackerEnum::InMemoryIdTracker(id_tracker) => id_tracker.files(),
}
}
}