Gouzi Mohaled
Ajout du dossier lib
84d2a97
use itertools::Itertools;
use segment::common::operation_error::OperationError;
use segment::data_types::vectors::{
DenseVector, MultiDenseVectorInternal, NamedVectorStruct, VectorInternal, VectorStructInternal,
};
use sparse::common::sparse_vector::SparseVector;
use tonic::Status;
use crate::grpc::qdrant as grpc;
use crate::rest::schema as rest;
fn convert_to_plain_multi_vector(
data: Vec<f32>,
vectors_count: usize,
) -> Result<rest::MultiDenseVector, OperationError> {
let dim = data.len() / vectors_count;
if dim * vectors_count != data.len() {
return Err(OperationError::ValidationError {
description: format!(
"Data length is not divisible by vectors count. Data length: {}, vectors count: {}",
data.len(),
vectors_count
),
});
}
Ok(data
.into_iter()
.chunks(dim)
.into_iter()
.map(Iterator::collect)
.collect())
}
impl TryFrom<rest::VectorOutput> for grpc::VectorOutput {
type Error = OperationError;
fn try_from(value: rest::VectorOutput) -> Result<Self, Self::Error> {
let vector = match value {
rest::VectorOutput::Dense(dense) => {
let internal_vector = VectorInternal::from(dense);
grpc::VectorOutput::from(internal_vector)
}
rest::VectorOutput::Sparse(sparse) => {
let internal_vector = VectorInternal::from(sparse);
grpc::VectorOutput::from(internal_vector)
}
rest::VectorOutput::MultiDense(multi) => {
let internal_vector = VectorInternal::try_from(multi)?;
grpc::VectorOutput::from(internal_vector)
}
};
Ok(vector)
}
}
impl TryFrom<rest::VectorStructOutput> for grpc::VectorsOutput {
type Error = OperationError;
fn try_from(
vector_struct: crate::rest::schema::VectorStructOutput,
) -> Result<Self, Self::Error> {
let vectors = match vector_struct {
crate::rest::schema::VectorStructOutput::Single(dense) => {
let vector = VectorInternal::from(dense);
Self {
vectors_options: Some(grpc::vectors_output::VectorsOptions::Vector(
grpc::VectorOutput::from(vector),
)),
}
}
crate::rest::schema::VectorStructOutput::MultiDense(vector) => {
let vector = VectorInternal::try_from(vector)?;
Self {
vectors_options: Some(grpc::vectors_output::VectorsOptions::Vector(
grpc::VectorOutput::from(vector),
)),
}
}
crate::rest::schema::VectorStructOutput::Named(vectors) => {
let vectors: Result<_, _> = vectors
.into_iter()
.map(|(name, vector)| grpc::VectorOutput::try_from(vector).map(|v| (name, v)))
.collect();
Self {
vectors_options: Some(grpc::vectors_output::VectorsOptions::Vectors(
grpc::NamedVectorsOutput { vectors: vectors? },
)),
}
}
};
Ok(vectors)
}
}
impl From<VectorInternal> for grpc::VectorOutput {
fn from(vector: VectorInternal) -> Self {
match vector {
VectorInternal::Dense(vector) => Self {
data: vector,
indices: None,
vectors_count: None,
vector: None,
},
VectorInternal::Sparse(vector) => Self {
data: vector.values,
indices: Some(grpc::SparseIndices {
data: vector.indices,
}),
vectors_count: None,
vector: None,
},
VectorInternal::MultiDense(vector) => {
let vector_count = vector.multi_vectors().count() as u32;
Self {
data: vector.flattened_vectors,
indices: None,
vectors_count: Some(vector_count),
vector: None,
}
}
}
}
}
impl From<VectorStructInternal> for grpc::VectorsOutput {
fn from(vector_struct: VectorStructInternal) -> Self {
match vector_struct {
VectorStructInternal::Single(vector) => {
let vector = VectorInternal::from(vector);
Self {
vectors_options: Some(grpc::vectors_output::VectorsOptions::Vector(
grpc::VectorOutput::from(vector),
)),
}
}
VectorStructInternal::MultiDense(vector) => {
let vector = VectorInternal::from(vector);
Self {
vectors_options: Some(grpc::vectors_output::VectorsOptions::Vector(
grpc::VectorOutput::from(vector),
)),
}
}
VectorStructInternal::Named(vectors) => Self {
vectors_options: Some(grpc::vectors_output::VectorsOptions::Vectors(
grpc::NamedVectorsOutput {
vectors: vectors
.into_iter()
.map(|(name, vector)| (name, grpc::VectorOutput::from(vector)))
.collect(),
},
)),
},
}
}
}
impl TryFrom<grpc::Vectors> for rest::VectorStruct {
type Error = Status;
fn try_from(vectors: grpc::Vectors) -> Result<Self, Self::Error> {
match vectors.vectors_options {
Some(vectors_options) => Ok(match vectors_options {
grpc::vectors::VectorsOptions::Vector(vector) => {
let grpc::Vector {
data,
indices,
vectors_count,
vector,
} = vector;
if let Some(vector) = vector {
return match vector {
grpc::vector::Vector::Dense(dense) => {
Ok(rest::VectorStruct::Single(dense.data))
}
grpc::vector::Vector::Sparse(_sparse) => {
return Err(Status::invalid_argument(
"Sparse vector must be named".to_string(),
));
}
grpc::vector::Vector::MultiDense(multi) => {
Ok(rest::VectorStruct::MultiDense(
multi.vectors.into_iter().map(|v| v.data).collect(),
))
}
grpc::vector::Vector::Document(document) => Ok(
rest::VectorStruct::Document(rest::Document::try_from(document)?),
),
grpc::vector::Vector::Image(image) => {
Ok(rest::VectorStruct::Image(rest::Image::try_from(image)?))
}
grpc::vector::Vector::Object(object) => Ok(rest::VectorStruct::Object(
rest::InferenceObject::try_from(object)?,
)),
};
}
if indices.is_some() {
return Err(Status::invalid_argument(
"Sparse vector must be named".to_string(),
));
}
if let Some(vectors_count) = vectors_count {
let multi = convert_to_plain_multi_vector(data, vectors_count as usize)
.map_err(|err| {
Status::invalid_argument(format!(
"Unable to convert to multi-dense vector: {err}"
))
})?;
rest::VectorStruct::MultiDense(multi)
} else {
rest::VectorStruct::Single(data)
}
}
grpc::vectors::VectorsOptions::Vectors(vectors) => {
let named_vectors: Result<_, _> = vectors
.vectors
.into_iter()
.map(|(k, v)| rest::Vector::try_from(v).map(|res| (k, res)))
.collect();
rest::VectorStruct::Named(named_vectors?)
}
}),
None => Err(Status::invalid_argument("No Vector Provided")),
}
}
}
impl TryFrom<grpc::Vector> for rest::Vector {
type Error = Status;
fn try_from(vector: grpc::Vector) -> Result<Self, Self::Error> {
let grpc::Vector {
data,
indices,
vectors_count,
vector,
} = vector;
if let Some(vector) = vector {
return match vector {
grpc::vector::Vector::Dense(dense) => Ok(rest::Vector::Dense(dense.data)),
grpc::vector::Vector::Sparse(sparse) => Ok(rest::Vector::Sparse(
sparse::common::sparse_vector::SparseVector::from(sparse),
)),
grpc::vector::Vector::MultiDense(multi) => Ok(rest::Vector::MultiDense(
multi.vectors.into_iter().map(|v| v.data).collect(),
)),
grpc::vector::Vector::Document(document) => {
Ok(rest::Vector::Document(rest::Document::try_from(document)?))
}
grpc::vector::Vector::Image(image) => {
Ok(rest::Vector::Image(rest::Image::try_from(image)?))
}
grpc::vector::Vector::Object(object) => Ok(rest::Vector::Object(
rest::InferenceObject::try_from(object)?,
)),
};
}
if let Some(indices) = indices {
return Ok(rest::Vector::Sparse(
sparse::common::sparse_vector::SparseVector {
values: data,
indices: indices.data,
},
));
}
if let Some(vectors_count) = vectors_count {
let multi =
convert_to_plain_multi_vector(data, vectors_count as usize).map_err(|err| {
Status::invalid_argument(format!(
"Unable to convert to multi-dense vector: {err}"
))
})?;
Ok(rest::Vector::MultiDense(multi))
} else {
Ok(rest::Vector::Dense(data))
}
}
}
impl grpc::MultiDenseVector {
pub fn into_matrix(self) -> Vec<Vec<f32>> {
self.vectors.into_iter().map(|v| v.data).collect()
}
}
impl TryFrom<grpc::VectorOutput> for VectorInternal {
type Error = OperationError;
fn try_from(vector: grpc::VectorOutput) -> Result<Self, Self::Error> {
let grpc::VectorOutput {
data,
indices,
vectors_count,
vector,
} = vector;
if let Some(vector) = vector {
return match vector {
grpc::vector_output::Vector::Dense(dense) => Ok(VectorInternal::Dense(dense.data)),
grpc::vector_output::Vector::Sparse(sparse) => Ok(VectorInternal::Sparse(
sparse::common::sparse_vector::SparseVector::from(sparse),
)),
grpc::vector_output::Vector::MultiDense(multi) => Ok(VectorInternal::MultiDense(
MultiDenseVectorInternal::try_from_matrix(multi.into_matrix())?,
)),
};
}
if let Some(indices) = indices {
return Ok(VectorInternal::Sparse(
sparse::common::sparse_vector::SparseVector {
values: data,
indices: indices.data,
},
));
}
if let Some(vectors_count) = vectors_count {
let dim = data.len() / vectors_count as usize;
let multi = MultiDenseVectorInternal::try_from_flatten(data, dim)?;
Ok(VectorInternal::MultiDense(multi))
} else {
Ok(VectorInternal::Dense(data))
}
}
}
impl TryFrom<grpc::VectorsOutput> for VectorStructInternal {
type Error = OperationError;
fn try_from(vectors_output: grpc::VectorsOutput) -> Result<Self, Self::Error> {
match vectors_output.vectors_options {
Some(vectors_options) => Ok(match vectors_options {
grpc::vectors_output::VectorsOptions::Vector(vector) => {
let grpc::VectorOutput {
data,
indices,
vectors_count,
vector,
} = vector;
if let Some(vector) = vector {
return match vector {
grpc::vector_output::Vector::Dense(dense) => {
Ok(VectorStructInternal::Single(dense.data))
}
grpc::vector_output::Vector::Sparse(_sparse) => {
return Err(OperationError::ValidationError {
description: "Sparse vector must be named".to_string(),
});
}
grpc::vector_output::Vector::MultiDense(multi) => {
Ok(VectorStructInternal::MultiDense(
MultiDenseVectorInternal::try_from_matrix(multi.into_matrix())?,
))
}
};
}
if indices.is_some() {
return Err(OperationError::ValidationError {
description: "Sparse vector must be named".to_string(),
});
}
if let Some(vectors_count) = vectors_count {
let dim = data.len() / vectors_count as usize;
let multi = MultiDenseVectorInternal::try_from_flatten(data, dim)?;
VectorStructInternal::MultiDense(multi)
} else {
VectorStructInternal::Single(data)
}
}
grpc::vectors_output::VectorsOptions::Vectors(vectors) => {
let named_vectors: Result<_, _> = vectors
.vectors
.into_iter()
.map(|(k, v)| VectorInternal::try_from(v).map(|res| (k, res)))
.collect();
VectorStructInternal::Named(named_vectors?)
}
}),
None => Err(OperationError::ValidationError {
description: "No Vector Provided".to_string(),
}),
}
}
}
impl From<VectorInternal> for grpc::Vector {
fn from(vector: VectorInternal) -> Self {
match vector {
VectorInternal::Dense(vector) => Self {
data: vector,
indices: None,
vectors_count: None,
vector: None,
},
VectorInternal::Sparse(vector) => Self {
data: vector.values,
indices: Some(grpc::SparseIndices {
data: vector.indices,
}),
vectors_count: None,
vector: None,
},
VectorInternal::MultiDense(vector) => {
let vector_count = vector.multi_vectors().count() as u32;
Self {
data: vector.flattened_vectors,
indices: None,
vectors_count: Some(vector_count),
vector: None,
}
}
}
}
}
impl From<VectorStructInternal> for grpc::Vectors {
fn from(vector_struct: VectorStructInternal) -> Self {
match vector_struct {
VectorStructInternal::Single(vector) => {
let vector = VectorInternal::from(vector);
Self {
vectors_options: Some(grpc::vectors::VectorsOptions::Vector(
grpc::Vector::from(vector),
)),
}
}
VectorStructInternal::MultiDense(vector) => {
let vector = VectorInternal::from(vector);
Self {
vectors_options: Some(grpc::vectors::VectorsOptions::Vector(
grpc::Vector::from(vector),
)),
}
}
VectorStructInternal::Named(vectors) => Self {
vectors_options: Some(grpc::vectors::VectorsOptions::Vectors(grpc::NamedVectors {
vectors: vectors
.into_iter()
.map(|(name, vector)| (name, grpc::Vector::from(vector)))
.collect(),
})),
},
}
}
}
impl TryFrom<grpc::Vector> for VectorInternal {
type Error = Status;
fn try_from(vector: grpc::Vector) -> Result<Self, Self::Error> {
// sparse vector
if let Some(indices) = vector.indices {
return Ok(VectorInternal::Sparse(
sparse::common::sparse_vector::SparseVector::new(indices.data, vector.data)
.map_err(|e| {
Status::invalid_argument(format!(
"Sparse indices does not match sparse vector conditions: {e}"
))
})?,
));
}
// multi vector
if let Some(vector_count) = vector.vectors_count {
if vector_count == 0 {
return Err(Status::invalid_argument(
"Vector count should be greater than 0",
));
}
let dim = vector.data.len() / vector_count as usize;
let multi = MultiDenseVectorInternal::new(vector.data, dim);
return Ok(VectorInternal::MultiDense(multi));
}
// dense vector
Ok(VectorInternal::Dense(vector.data))
}
}
impl From<grpc::DenseVector> for DenseVector {
fn from(value: grpc::DenseVector) -> Self {
value.data
}
}
impl From<DenseVector> for grpc::DenseVector {
fn from(value: DenseVector) -> Self {
Self { data: value }
}
}
impl From<SparseVector> for grpc::SparseVector {
fn from(value: SparseVector) -> Self {
let SparseVector { indices, values } = value;
Self { values, indices }
}
}
impl From<grpc::SparseVector> for SparseVector {
fn from(value: grpc::SparseVector) -> Self {
let grpc::SparseVector { indices, values } = value;
Self { indices, values }
}
}
impl From<MultiDenseVectorInternal> for grpc::MultiDenseVector {
fn from(value: MultiDenseVectorInternal) -> Self {
let vectors = value
.flattened_vectors
.into_iter()
.chunks(value.dim)
.into_iter()
.map(Iterator::collect::<Vec<_>>)
.map(grpc::DenseVector::from)
.collect();
Self { vectors }
}
}
impl From<grpc::MultiDenseVector> for MultiDenseVectorInternal {
/// Uses the equivalent of [new_unchecked()](segment_vectors::MultiDenseVectorInternal::new_unchecked), but rewritten to avoid collecting twice
fn from(value: grpc::MultiDenseVector) -> Self {
let dim = value.vectors[0].data.len();
let inner_vector = value
.vectors
.into_iter()
.flat_map(DenseVector::from)
.collect();
Self {
flattened_vectors: inner_vector,
dim,
}
}
}
impl From<VectorInternal> for grpc::RawVector {
fn from(value: VectorInternal) -> Self {
use crate::grpc::qdrant::raw_vector::Variant;
let variant = match value {
VectorInternal::Dense(vector) => Variant::Dense(grpc::DenseVector::from(vector)),
VectorInternal::Sparse(vector) => Variant::Sparse(grpc::SparseVector::from(vector)),
VectorInternal::MultiDense(vector) => {
Variant::MultiDense(grpc::MultiDenseVector::from(vector))
}
};
Self {
variant: Some(variant),
}
}
}
impl TryFrom<grpc::RawVector> for VectorInternal {
type Error = Status;
fn try_from(value: grpc::RawVector) -> Result<Self, Self::Error> {
use crate::grpc::qdrant::raw_vector::Variant;
let variant = value
.variant
.ok_or_else(|| Status::invalid_argument("No vector variant provided"))?;
let vector = match variant {
Variant::Dense(dense) => VectorInternal::Dense(DenseVector::from(dense)),
Variant::Sparse(sparse) => {
VectorInternal::Sparse(sparse::common::sparse_vector::SparseVector::from(sparse))
}
Variant::MultiDense(multi_dense) => {
VectorInternal::MultiDense(MultiDenseVectorInternal::from(multi_dense))
}
};
Ok(vector)
}
}
impl From<NamedVectorStruct> for grpc::RawVector {
fn from(value: NamedVectorStruct) -> Self {
Self::from(value.to_vector())
}
}