use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; use common::tar_ext; use common::types::TelemetryDetail; use crate::common::operation_error::{OperationResult, SegmentFailedState}; use crate::data_types::facets::{FacetParams, FacetValue}; use crate::data_types::named_vectors::NamedVectors; use crate::data_types::order_by::{OrderBy, OrderValue}; use crate::data_types::query_context::{QueryContext, SegmentQueryContext}; use crate::data_types::vectors::{QueryVector, VectorInternal}; use crate::index::field_index::{CardinalityEstimation, FieldIndex}; use crate::json_path::JsonPath; use crate::telemetry::SegmentTelemetry; use crate::types::{ Filter, Payload, PayloadFieldSchema, PayloadKeyType, PayloadKeyTypeRef, PointIdType, ScoredPoint, SearchParams, SegmentConfig, SegmentInfo, SegmentType, SeqNumberType, SnapshotFormat, WithPayload, WithVector, }; /// Define all operations which can be performed with Segment or Segment-like entity. /// /// Assume all operations are idempotent - which means that no matter how many times an operation /// is executed - the storage state will be the same. pub trait SegmentEntry { /// Get current update version of the segment fn version(&self) -> SeqNumberType; /// Get version of specified point fn point_version(&self, point_id: PointIdType) -> Option; #[allow(clippy::too_many_arguments)] fn search_batch( &self, vector_name: &str, query_vectors: &[&QueryVector], with_payload: &WithPayload, with_vector: &WithVector, filter: Option<&Filter>, top: usize, params: Option<&SearchParams>, query_context: &SegmentQueryContext, ) -> OperationResult>>; fn upsert_point( &mut self, op_num: SeqNumberType, point_id: PointIdType, vectors: NamedVectors, ) -> OperationResult; fn delete_point( &mut self, op_num: SeqNumberType, point_id: PointIdType, ) -> OperationResult; fn update_vectors( &mut self, op_num: SeqNumberType, point_id: PointIdType, vectors: NamedVectors, ) -> OperationResult; fn delete_vector( &mut self, op_num: SeqNumberType, point_id: PointIdType, vector_name: &str, ) -> OperationResult; fn set_payload( &mut self, op_num: SeqNumberType, point_id: PointIdType, payload: &Payload, key: &Option, ) -> OperationResult; fn set_full_payload( &mut self, op_num: SeqNumberType, point_id: PointIdType, full_payload: &Payload, ) -> OperationResult; fn delete_payload( &mut self, op_num: SeqNumberType, point_id: PointIdType, key: PayloadKeyTypeRef, ) -> OperationResult; fn clear_payload( &mut self, op_num: SeqNumberType, point_id: PointIdType, ) -> OperationResult; fn vector( &self, vector_name: &str, point_id: PointIdType, ) -> OperationResult>; fn all_vectors(&self, point_id: PointIdType) -> OperationResult; /// Retrieve payload for the point /// If not found, return empty payload fn payload(&self, point_id: PointIdType) -> OperationResult; /// Iterator over all points in segment in ascending order. fn iter_points(&self) -> Box + '_>; /// Paginate over points which satisfies filtering condition starting with `offset` id including. /// /// Cancelled by `is_stopped` flag. fn read_filtered<'a>( &'a self, offset: Option, limit: Option, filter: Option<&'a Filter>, is_stopped: &AtomicBool, ) -> Vec; /// Return points which satisfies filtering condition ordered by the `order_by.key` field, /// starting with `order_by.start_from` value including. /// /// Will fail if there is no index for the order_by key. /// Cancelled by `is_stopped` flag. fn read_ordered_filtered<'a>( &'a self, limit: Option, filter: Option<&'a Filter>, order_by: &'a OrderBy, is_stopped: &AtomicBool, ) -> OperationResult>; /// Return random points which satisfies filtering condition. /// /// Cancelled by `is_stopped` flag. fn read_random_filtered( &self, limit: usize, filter: Option<&Filter>, is_stopped: &AtomicBool, ) -> Vec; /// Read points in [from; to) range fn read_range(&self, from: Option, to: Option) -> Vec; /// Return all unique values for the given key. fn unique_values( &self, key: &JsonPath, filter: Option<&Filter>, is_stopped: &AtomicBool, ) -> OperationResult>; /// Return the largest counts for the given facet request. fn facet( &self, request: &FacetParams, is_stopped: &AtomicBool, ) -> OperationResult>; /// Check if there is point with `point_id` in this segment. fn has_point(&self, point_id: PointIdType) -> bool; /// Estimate available point count in this segment for given filter. fn estimate_point_count<'a>(&'a self, filter: Option<&'a Filter>) -> CardinalityEstimation; fn vector_names(&self) -> HashSet; /// Number of available points /// /// - excludes soft deleted points fn available_point_count(&self) -> usize; /// Number of deleted points fn deleted_point_count(&self) -> usize; /// Size of all available vectors in storage fn available_vectors_size_in_bytes(&self, vector_name: &str) -> OperationResult; /// Max value from all `available_vectors_size_in_bytes` fn max_available_vectors_size_in_bytes(&self) -> OperationResult { self.vector_names() .into_iter() .map(|vector_name| self.available_vectors_size_in_bytes(&vector_name)) .collect::>>() .map(|sizes| sizes.into_iter().max().unwrap_or_default()) } /// Get segment type fn segment_type(&self) -> SegmentType; /// Get current stats of the segment fn info(&self) -> SegmentInfo; /// Get segment configuration fn config(&self) -> &SegmentConfig; /// Get current stats of the segment fn is_appendable(&self) -> bool; /// Flushes current segment state into a persistent storage, if possible /// if sync == true, block current thread while flushing /// /// Returns maximum version number which is guaranteed to be persisted. fn flush(&self, sync: bool, force: bool) -> OperationResult; /// Removes all persisted data and forces to destroy segment fn drop_data(self) -> OperationResult<()>; /// Path to data, owned by segment fn data_path(&self) -> PathBuf; /// Delete field index, if exists fn delete_field_index( &mut self, op_num: SeqNumberType, key: PayloadKeyTypeRef, ) -> OperationResult; /// Build the field index for the key and schema, if not built before. fn build_field_index( &self, op_num: SeqNumberType, key: PayloadKeyTypeRef, field_type: Option<&PayloadFieldSchema>, ) -> OperationResult)>>; /// Apply a built index. Returns whether it was actually applied or not. fn apply_field_index( &mut self, op_num: SeqNumberType, key: PayloadKeyType, field_schema: PayloadFieldSchema, field_index: Vec, ) -> OperationResult; /// Create index for a payload field, if not exists fn create_field_index( &mut self, op_num: SeqNumberType, key: PayloadKeyTypeRef, field_schema: Option<&PayloadFieldSchema>, ) -> OperationResult { let Some((schema, index)) = self.build_field_index(op_num, key, field_schema)? else { return Ok(false); }; self.apply_field_index(op_num, key.to_owned(), schema, index) } /// Get indexed fields fn get_indexed_fields(&self) -> HashMap; /// Checks if segment errored during last operations fn check_error(&self) -> Option; /// Delete points by the given filter fn delete_filtered<'a>( &'a mut self, op_num: SeqNumberType, filter: &'a Filter, ) -> OperationResult; /// Take a snapshot of the segment. /// /// Creates a tar archive of the segment directory into `snapshot_dir_path`. /// Uses `temp_path` to prepare files to archive. /// The `snapshotted_segments` set is used to avoid writing the same snapshot twice. fn take_snapshot( &self, temp_path: &Path, tar: &tar_ext::BuilderExt, format: SnapshotFormat, snapshotted_segments: &mut HashSet, ) -> OperationResult<()>; // Get collected telemetry data of segment fn get_telemetry_data(&self, detail: TelemetryDetail) -> SegmentTelemetry; fn fill_query_context(&self, query_context: &mut QueryContext); }