Gouzi Mohaled
Ajout du dossier lib
84d2a97
use std::collections::{BTreeSet, HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use common::tar_ext;
use common::types::TelemetryDetail;
use crate::common::operation_error::{OperationResult, SegmentFailedState};
use crate::data_types::facets::{FacetParams, FacetValue};
use crate::data_types::named_vectors::NamedVectors;
use crate::data_types::order_by::{OrderBy, OrderValue};
use crate::data_types::query_context::{QueryContext, SegmentQueryContext};
use crate::data_types::vectors::{QueryVector, VectorInternal};
use crate::index::field_index::{CardinalityEstimation, FieldIndex};
use crate::json_path::JsonPath;
use crate::telemetry::SegmentTelemetry;
use crate::types::{
Filter, Payload, PayloadFieldSchema, PayloadKeyType, PayloadKeyTypeRef, PointIdType,
ScoredPoint, SearchParams, SegmentConfig, SegmentInfo, SegmentType, SeqNumberType,
SnapshotFormat, WithPayload, WithVector,
};
/// Define all operations which can be performed with Segment or Segment-like entity.
///
/// Assume all operations are idempotent - which means that no matter how many times an operation
/// is executed - the storage state will be the same.
pub trait SegmentEntry {
/// Get current update version of the segment
fn version(&self) -> SeqNumberType;
/// Get version of specified point
fn point_version(&self, point_id: PointIdType) -> Option<SeqNumberType>;
#[allow(clippy::too_many_arguments)]
fn search_batch(
&self,
vector_name: &str,
query_vectors: &[&QueryVector],
with_payload: &WithPayload,
with_vector: &WithVector,
filter: Option<&Filter>,
top: usize,
params: Option<&SearchParams>,
query_context: &SegmentQueryContext,
) -> OperationResult<Vec<Vec<ScoredPoint>>>;
fn upsert_point(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
vectors: NamedVectors,
) -> OperationResult<bool>;
fn delete_point(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
) -> OperationResult<bool>;
fn update_vectors(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
vectors: NamedVectors,
) -> OperationResult<bool>;
fn delete_vector(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
vector_name: &str,
) -> OperationResult<bool>;
fn set_payload(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
payload: &Payload,
key: &Option<JsonPath>,
) -> OperationResult<bool>;
fn set_full_payload(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
full_payload: &Payload,
) -> OperationResult<bool>;
fn delete_payload(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
key: PayloadKeyTypeRef,
) -> OperationResult<bool>;
fn clear_payload(
&mut self,
op_num: SeqNumberType,
point_id: PointIdType,
) -> OperationResult<bool>;
fn vector(
&self,
vector_name: &str,
point_id: PointIdType,
) -> OperationResult<Option<VectorInternal>>;
fn all_vectors(&self, point_id: PointIdType) -> OperationResult<NamedVectors>;
/// Retrieve payload for the point
/// If not found, return empty payload
fn payload(&self, point_id: PointIdType) -> OperationResult<Payload>;
/// Iterator over all points in segment in ascending order.
fn iter_points(&self) -> Box<dyn Iterator<Item = PointIdType> + '_>;
/// Paginate over points which satisfies filtering condition starting with `offset` id including.
///
/// Cancelled by `is_stopped` flag.
fn read_filtered<'a>(
&'a self,
offset: Option<PointIdType>,
limit: Option<usize>,
filter: Option<&'a Filter>,
is_stopped: &AtomicBool,
) -> Vec<PointIdType>;
/// Return points which satisfies filtering condition ordered by the `order_by.key` field,
/// starting with `order_by.start_from` value including.
///
/// Will fail if there is no index for the order_by key.
/// Cancelled by `is_stopped` flag.
fn read_ordered_filtered<'a>(
&'a self,
limit: Option<usize>,
filter: Option<&'a Filter>,
order_by: &'a OrderBy,
is_stopped: &AtomicBool,
) -> OperationResult<Vec<(OrderValue, PointIdType)>>;
/// Return random points which satisfies filtering condition.
///
/// Cancelled by `is_stopped` flag.
fn read_random_filtered(
&self,
limit: usize,
filter: Option<&Filter>,
is_stopped: &AtomicBool,
) -> Vec<PointIdType>;
/// Read points in [from; to) range
fn read_range(&self, from: Option<PointIdType>, to: Option<PointIdType>) -> Vec<PointIdType>;
/// Return all unique values for the given key.
fn unique_values(
&self,
key: &JsonPath,
filter: Option<&Filter>,
is_stopped: &AtomicBool,
) -> OperationResult<BTreeSet<FacetValue>>;
/// Return the largest counts for the given facet request.
fn facet(
&self,
request: &FacetParams,
is_stopped: &AtomicBool,
) -> OperationResult<HashMap<FacetValue, usize>>;
/// Check if there is point with `point_id` in this segment.
fn has_point(&self, point_id: PointIdType) -> bool;
/// Estimate available point count in this segment for given filter.
fn estimate_point_count<'a>(&'a self, filter: Option<&'a Filter>) -> CardinalityEstimation;
fn vector_names(&self) -> HashSet<String>;
/// Number of available points
///
/// - excludes soft deleted points
fn available_point_count(&self) -> usize;
/// Number of deleted points
fn deleted_point_count(&self) -> usize;
/// Size of all available vectors in storage
fn available_vectors_size_in_bytes(&self, vector_name: &str) -> OperationResult<usize>;
/// Max value from all `available_vectors_size_in_bytes`
fn max_available_vectors_size_in_bytes(&self) -> OperationResult<usize> {
self.vector_names()
.into_iter()
.map(|vector_name| self.available_vectors_size_in_bytes(&vector_name))
.collect::<OperationResult<Vec<_>>>()
.map(|sizes| sizes.into_iter().max().unwrap_or_default())
}
/// Get segment type
fn segment_type(&self) -> SegmentType;
/// Get current stats of the segment
fn info(&self) -> SegmentInfo;
/// Get segment configuration
fn config(&self) -> &SegmentConfig;
/// Get current stats of the segment
fn is_appendable(&self) -> bool;
/// Flushes current segment state into a persistent storage, if possible
/// if sync == true, block current thread while flushing
///
/// Returns maximum version number which is guaranteed to be persisted.
fn flush(&self, sync: bool, force: bool) -> OperationResult<SeqNumberType>;
/// Removes all persisted data and forces to destroy segment
fn drop_data(self) -> OperationResult<()>;
/// Path to data, owned by segment
fn data_path(&self) -> PathBuf;
/// Delete field index, if exists
fn delete_field_index(
&mut self,
op_num: SeqNumberType,
key: PayloadKeyTypeRef,
) -> OperationResult<bool>;
/// Build the field index for the key and schema, if not built before.
fn build_field_index(
&self,
op_num: SeqNumberType,
key: PayloadKeyTypeRef,
field_type: Option<&PayloadFieldSchema>,
) -> OperationResult<Option<(PayloadFieldSchema, Vec<FieldIndex>)>>;
/// Apply a built index. Returns whether it was actually applied or not.
fn apply_field_index(
&mut self,
op_num: SeqNumberType,
key: PayloadKeyType,
field_schema: PayloadFieldSchema,
field_index: Vec<FieldIndex>,
) -> OperationResult<bool>;
/// Create index for a payload field, if not exists
fn create_field_index(
&mut self,
op_num: SeqNumberType,
key: PayloadKeyTypeRef,
field_schema: Option<&PayloadFieldSchema>,
) -> OperationResult<bool> {
let Some((schema, index)) = self.build_field_index(op_num, key, field_schema)? else {
return Ok(false);
};
self.apply_field_index(op_num, key.to_owned(), schema, index)
}
/// Get indexed fields
fn get_indexed_fields(&self) -> HashMap<PayloadKeyType, PayloadFieldSchema>;
/// Checks if segment errored during last operations
fn check_error(&self) -> Option<SegmentFailedState>;
/// Delete points by the given filter
fn delete_filtered<'a>(
&'a mut self,
op_num: SeqNumberType,
filter: &'a Filter,
) -> OperationResult<usize>;
/// Take a snapshot of the segment.
///
/// Creates a tar archive of the segment directory into `snapshot_dir_path`.
/// Uses `temp_path` to prepare files to archive.
/// The `snapshotted_segments` set is used to avoid writing the same snapshot twice.
fn take_snapshot(
&self,
temp_path: &Path,
tar: &tar_ext::BuilderExt,
format: SnapshotFormat,
snapshotted_segments: &mut HashSet<String>,
) -> OperationResult<()>;
// Get collected telemetry data of segment
fn get_telemetry_data(&self, detail: TelemetryDetail) -> SegmentTelemetry;
fn fill_query_context(&self, query_context: &mut QueryContext);
}