Spaces:
Build error
Build error
File size: 7,911 Bytes
84d2a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
pub mod anonymize;
pub mod error_logging;
pub mod macros;
pub mod mmap_bitslice_buffered_update_wrapper;
pub mod mmap_slice_buffered_update_wrapper;
pub mod operation_error;
pub mod operation_time_statistics;
pub mod reciprocal_rank_fusion;
pub mod rocksdb_buffered_delete_wrapper;
pub mod rocksdb_buffered_update_wrapper;
pub mod rocksdb_wrapper;
pub mod score_fusion;
pub mod utils;
pub mod validate_snapshot_archive;
pub mod vector_utils;
use std::sync::atomic::AtomicBool;
use crate::common::operation_error::{OperationError, OperationResult};
use crate::data_types::named_vectors::NamedVectors;
use crate::data_types::vectors::{QueryVector, VectorRef};
use crate::types::{SegmentConfig, SparseVectorDataConfig, VectorDataConfig};
pub type Flusher = Box<dyn FnOnce() -> OperationResult<()> + Send>;
/// Check that the given vector name is part of the segment config.
///
/// Returns an error if incompatible.
pub fn check_vector_name(vector_name: &str, segment_config: &SegmentConfig) -> OperationResult<()> {
// TODO(sparse) it's a wrong error check. We use the fact,
// that get_vector_config_or_error can return only one type of error - VectorNameNotExists
if get_vector_config_or_error(vector_name, segment_config).is_err() {
get_sparse_vector_config_or_error(vector_name, segment_config)?;
}
Ok(())
}
/// Check that the given vector name and elements are compatible with the given segment config.
///
/// Returns an error if incompatible.
pub fn check_vector(
vector_name: &str,
query_vector: &QueryVector,
segment_config: &SegmentConfig,
) -> OperationResult<()> {
let vector_config = get_vector_config_or_error(vector_name, segment_config);
if vector_config.is_ok() {
check_query_vector(query_vector, vector_config?)
} else {
let sparse_vector_config = get_sparse_vector_config_or_error(vector_name, segment_config)?;
check_query_sparse_vector(query_vector, sparse_vector_config)
}
}
fn check_query_vector(
query_vector: &QueryVector,
vector_config: &VectorDataConfig,
) -> OperationResult<()> {
match query_vector {
QueryVector::Nearest(vector) => {
check_vector_against_config(VectorRef::from(vector), vector_config)?
}
QueryVector::Recommend(reco_query) => reco_query.flat_iter().try_for_each(|vector| {
check_vector_against_config(VectorRef::from(vector), vector_config)
})?,
QueryVector::Discovery(discovery_query) => {
discovery_query.flat_iter().try_for_each(|vector| {
check_vector_against_config(VectorRef::from(vector), vector_config)
})?
}
QueryVector::Context(discovery_context_query) => {
discovery_context_query.flat_iter().try_for_each(|vector| {
check_vector_against_config(VectorRef::from(vector), vector_config)
})?
}
}
Ok(())
}
fn check_query_sparse_vector(
query_vector: &QueryVector,
vector_config: &SparseVectorDataConfig,
) -> OperationResult<()> {
match query_vector {
QueryVector::Nearest(vector) => {
check_sparse_vector_against_config(VectorRef::from(vector), vector_config)?
}
QueryVector::Recommend(reco_query) => reco_query.flat_iter().try_for_each(|vector| {
check_sparse_vector_against_config(VectorRef::from(vector), vector_config)
})?,
QueryVector::Discovery(discovery_query) => {
discovery_query.flat_iter().try_for_each(|vector| {
check_sparse_vector_against_config(VectorRef::from(vector), vector_config)
})?
}
QueryVector::Context(discovery_context_query) => {
discovery_context_query.flat_iter().try_for_each(|vector| {
check_sparse_vector_against_config(VectorRef::from(vector), vector_config)
})?
}
}
Ok(())
}
/// Check that the given vector name and elements are compatible with the given segment config.
///
/// Returns an error if incompatible.
pub fn check_query_vectors(
vector_name: &str,
query_vectors: &[&QueryVector],
segment_config: &SegmentConfig,
) -> OperationResult<()> {
let vector_config = get_vector_config_or_error(vector_name, segment_config);
if let Ok(vector_config) = vector_config {
query_vectors
.iter()
.try_for_each(|qv| check_query_vector(qv, vector_config))?;
} else {
let sparse_vector_config = get_sparse_vector_config_or_error(vector_name, segment_config)?;
query_vectors
.iter()
.try_for_each(|qv| check_query_sparse_vector(qv, sparse_vector_config))?;
}
Ok(())
}
/// Check that the given named vectors are compatible with the given segment config.
///
/// Returns an error if incompatible.
pub fn check_named_vectors(
vectors: &NamedVectors,
segment_config: &SegmentConfig,
) -> OperationResult<()> {
for (vector_name, vector_data) in vectors.iter() {
check_vector(vector_name, &vector_data.into(), segment_config)?;
}
Ok(())
}
/// Get the vector config for the given name, or return a name error.
///
/// Returns an error if incompatible.
fn get_vector_config_or_error<'a>(
vector_name: &str,
segment_config: &'a SegmentConfig,
) -> OperationResult<&'a VectorDataConfig> {
segment_config
.vector_data
.get(vector_name)
.ok_or_else(|| OperationError::VectorNameNotExists {
received_name: vector_name.into(),
})
}
/// Get the sparse vector config for the given name, or return a name error.
///
/// Returns an error if incompatible.
fn get_sparse_vector_config_or_error<'a>(
vector_name: &str,
segment_config: &'a SegmentConfig,
) -> OperationResult<&'a SparseVectorDataConfig> {
segment_config
.sparse_vector_data
.get(vector_name)
.ok_or_else(|| OperationError::VectorNameNotExists {
received_name: vector_name.into(),
})
}
/// Check if the given dense vector data is compatible with the given configuration.
///
/// Returns an error if incompatible.
fn check_vector_against_config(
vector: VectorRef,
vector_config: &VectorDataConfig,
) -> OperationResult<()> {
match vector {
VectorRef::Dense(vector) => {
// Check dimensionality
let dim = vector_config.size;
if vector.len() != dim {
return Err(OperationError::WrongVectorDimension {
expected_dim: dim,
received_dim: vector.len(),
});
}
Ok(())
}
VectorRef::Sparse(_) => Err(OperationError::WrongSparse),
VectorRef::MultiDense(multi_vector) => {
// Check dimensionality
let dim = vector_config.size;
for vector in multi_vector.multi_vectors() {
if vector.len() != dim {
return Err(OperationError::WrongVectorDimension {
expected_dim: dim,
received_dim: vector.len(),
});
}
}
Ok(())
}
}
}
fn check_sparse_vector_against_config(
vector: VectorRef,
_vector_config: &SparseVectorDataConfig,
) -> OperationResult<()> {
match vector {
VectorRef::Dense(_) => Err(OperationError::WrongSparse),
VectorRef::Sparse(_vector) => Ok(()), // TODO(sparse) check vector by config
VectorRef::MultiDense(_) => Err(OperationError::WrongMulti),
}
}
pub fn check_stopped(is_stopped: &AtomicBool) -> OperationResult<()> {
if is_stopped.load(std::sync::atomic::Ordering::Relaxed) {
return Err(OperationError::Cancelled {
description: "Operation is stopped externally".to_string(),
});
}
Ok(())
}
pub const BYTES_IN_KB: usize = 1024;
|