use std::collections::HashMap; use std::hash::{Hash, Hasher}; use common::types::ScoreType; use common::validation::validate_multi_vector; use schemars::JsonSchema; use segment::common::utils::MaybeOneOrMany; use segment::data_types::order_by::OrderBy; use segment::json_path::JsonPath; use segment::types::{ Filter, IntPayloadType, Payload, PointIdType, SearchParams, ShardKey, WithPayloadInterface, WithVector, }; use serde::{Deserialize, Serialize}; use serde_json::Value; use sparse::common::sparse_vector::SparseVector; use validator::{Validate, ValidationErrors}; /// Type for dense vector pub type DenseVector = Vec; /// Type for multi dense vector pub type MultiDenseVector = Vec; /// Vector Data /// Vectors can be described directly with values /// Or specified with source "objects" for inference #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] #[serde(untagged, rename_all = "snake_case")] pub enum Vector { Dense(DenseVector), Sparse(sparse::common::sparse_vector::SparseVector), MultiDense(MultiDenseVector), Document(Document), Image(Image), Object(InferenceObject), } /// Vector Data stored in Point #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] #[serde(untagged, rename_all = "snake_case")] pub enum VectorOutput { Dense(DenseVector), Sparse(sparse::common::sparse_vector::SparseVector), MultiDense(MultiDenseVector), } impl Validate for Vector { fn validate(&self) -> Result<(), validator::ValidationErrors> { match self { Vector::Dense(_) => Ok(()), Vector::Sparse(v) => v.validate(), Vector::MultiDense(m) => validate_multi_vector(m), Vector::Document(_) => Ok(()), Vector::Image(_) => Ok(()), Vector::Object(_) => Ok(()), } } } fn vector_example() -> DenseVector { vec![0.875, 0.140625, 0.8976] } fn multi_dense_vector_example() -> MultiDenseVector { vec![ vec![0.875, 0.140625, 0.1102], vec![0.758, 0.28126, 0.96871], vec![0.621, 0.421878, 0.9375], ] } fn named_vector_example() -> HashMap { let mut map = HashMap::new(); map.insert( "image-embeddings".to_string(), Vector::Dense(vec![0.873, 0.140625, 0.8976]), ); map } /// Full vector data per point separator with single and multiple vector modes #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] #[serde(untagged, rename_all = "snake_case")] pub enum VectorStruct { #[schemars(example = "vector_example")] Single(DenseVector), #[schemars(example = "multi_dense_vector_example")] MultiDense(MultiDenseVector), #[schemars(example = "named_vector_example")] Named(HashMap), Document(Document), Image(Image), Object(InferenceObject), } /// Vector data stored in Point #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] #[serde(untagged, rename_all = "snake_case")] pub enum VectorStructOutput { #[schemars(example = "vector_example")] Single(DenseVector), #[schemars(example = "multi_dense_vector_example")] MultiDense(MultiDenseVector), #[schemars(example = "named_vector_example")] Named(HashMap), } impl VectorStruct { /// Check if this vector struct is empty. pub fn is_empty(&self) -> bool { match self { VectorStruct::Single(vector) => vector.is_empty(), VectorStruct::MultiDense(vector) => vector.is_empty(), VectorStruct::Named(vectors) => vectors.values().all(|v| match v { Vector::Dense(vector) => vector.is_empty(), Vector::Sparse(vector) => vector.indices.is_empty(), Vector::MultiDense(vector) => vector.is_empty(), Vector::Document(_) => false, Vector::Image(_) => false, Vector::Object(_) => false, }), VectorStruct::Document(_) => false, VectorStruct::Image(_) => false, VectorStruct::Object(_) => false, } } } impl Validate for VectorStruct { fn validate(&self) -> Result<(), validator::ValidationErrors> { match self { VectorStruct::Single(_) => Ok(()), VectorStruct::MultiDense(v) => validate_multi_vector(v), VectorStruct::Named(v) => common::validation::validate_iter(v.values()), VectorStruct::Document(_) => Ok(()), VectorStruct::Image(_) => Ok(()), VectorStruct::Object(_) => Ok(()), } } } #[derive(Clone, Default, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema)] pub struct Options { /// Parameters for the model /// Values of the parameters are model-specific pub options: Option>, } impl Hash for Options { fn hash(&self, state: &mut H) { // Order of keys in the map should not affect the hash if let Some(options) = &self.options { let mut keys: Vec<_> = options.keys().collect(); keys.sort(); for key in keys { key.hash(state); options.get(key).unwrap().hash(state); } } } } /// WARN: Work-in-progress, unimplemented /// /// Text document for embedding. Requires inference infrastructure, unimplemented. #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Hash, Validate)] pub struct Document { /// Text of the document /// This field will be used as input for the embedding model #[schemars(example = "document_text_example")] pub text: String, /// Name of the model used to generate the vector /// List of available models depends on a provider #[validate(length(min = 1))] #[schemars(length(min = 1), example = "model_example")] pub model: String, #[serde(flatten)] pub options: Options, } /// WARN: Work-in-progress, unimplemented /// /// Image object for embedding. Requires inference infrastructure, unimplemented. #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Validate, Hash)] pub struct Image { /// Image data: base64 encoded image or an URL #[schemars(example = "image_value_example")] pub image: Value, /// Name of the model used to generate the vector /// List of available models depends on a provider #[validate(length(min = 1))] #[schemars(length(min = 1), example = "image_model_example")] pub model: String, /// Parameters for the model /// Values of the parameters are model-specific #[serde(flatten)] pub options: Options, } /// WARN: Work-in-progress, unimplemented /// /// Custom object for embedding. Requires inference infrastructure, unimplemented. #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Hash, Validate)] pub struct InferenceObject { /// Arbitrary data, used as input for the embedding model /// Used if the model requires more than one input or a custom input pub object: Value, /// Name of the model used to generate the vector /// List of available models depends on a provider #[validate(length(min = 1))] #[schemars(length(min = 1), example = "model_example")] pub model: String, /// Parameters for the model /// Values of the parameters are model-specific #[serde(flatten)] pub options: Options, } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] #[serde(untagged, rename_all = "snake_case")] pub enum BatchVectorStruct { Single(Vec), MultiDense(Vec), Named(HashMap>), Document(Vec), Image(Vec), Object(Vec), } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct Batch { pub ids: Vec, pub vectors: BatchVectorStruct, pub payloads: Option>>, } #[derive(Debug, Deserialize, Serialize, Clone, JsonSchema, PartialEq)] #[serde(untagged)] pub enum ShardKeySelector { ShardKey(ShardKey), ShardKeys(Vec), // ToDo: select by pattern } fn version_example() -> segment::types::SeqNumberType { 3 } fn score_example() -> common::types::ScoreType { 0.75 } fn document_text_example() -> String { "This is a document text".to_string() } fn model_example() -> String { "jinaai/jina-embeddings-v2-base-en".to_string() } fn image_value_example() -> String { "https://example.com/image.jpg".to_string() } fn image_model_example() -> String { "Qdrant/clip-ViT-B-32-vision".to_string() } /// Search result #[derive(Serialize, JsonSchema, Clone, Debug)] pub struct ScoredPoint { /// Point id pub id: PointIdType, /// Point version #[schemars(example = "version_example")] pub version: segment::types::SeqNumberType, /// Points vector distance to the query vector #[schemars(example = "score_example")] pub score: ScoreType, /// Payload - values assigned to the point #[serde(skip_serializing_if = "Option::is_none")] pub payload: Option, /// Vector of the point #[serde(skip_serializing_if = "Option::is_none")] pub vector: Option, /// Shard Key #[serde(skip_serializing_if = "Option::is_none")] pub shard_key: Option, /// Order-by value #[serde(skip_serializing_if = "Option::is_none")] pub order_value: Option, } /// Point data #[derive(Clone, Debug, PartialEq, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct Record { /// Id of the point pub id: segment::types::PointIdType, /// Payload - values assigned to the point #[serde(skip_serializing_if = "Option::is_none")] pub payload: Option, /// Vector of the point #[serde(skip_serializing_if = "Option::is_none")] pub vector: Option, /// Shard Key #[serde(skip_serializing_if = "Option::is_none")] pub shard_key: Option, #[serde(skip_serializing_if = "Option::is_none")] pub order_value: Option, } /// Vector data separator for named and unnamed modes /// Unnamed mode: /// /// { /// "vector": [1.0, 2.0, 3.0] /// } /// /// or named mode: /// /// { /// "vector": { /// "vector": [1.0, 2.0, 3.0], /// "name": "image-embeddings" /// } /// } #[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq)] #[serde(rename_all = "snake_case")] #[serde(untagged)] pub enum NamedVectorStruct { Default(segment::data_types::vectors::DenseVector), Dense(segment::data_types::vectors::NamedVector), Sparse(segment::data_types::vectors::NamedSparseVector), // No support for multi-dense vectors in search } #[derive(Deserialize, Serialize, JsonSchema, Clone, Debug, PartialEq)] #[serde(untagged)] pub enum OrderByInterface { Key(JsonPath), Struct(OrderBy), } /// Fusion algorithm allows to combine results of multiple prefetches. /// /// Available fusion algorithms: /// /// * `rrf` - Reciprocal Rank Fusion /// * `dbsf` - Distribution-Based Score Fusion #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum Fusion { Rrf, Dbsf, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(untagged)] pub enum VectorInput { DenseVector(DenseVector), SparseVector(SparseVector), MultiDenseVector(MultiDenseVector), Id(segment::types::PointIdType), Document(Document), Image(Image), Object(InferenceObject), } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct QueryRequestInternal { /// Sub-requests to perform first. If present, the query will be performed on the results of the prefetch(es). #[validate(nested)] #[serde(default, with = "MaybeOneOrMany")] #[schemars(with = "MaybeOneOrMany")] pub prefetch: Option>, /// Query to perform. If missing without prefetches, returns points ordered by their IDs. #[validate(nested)] pub query: Option, /// Define which vector name to use for querying. If missing, the default vector is used. pub using: Option, /// Filter conditions - return only those points that satisfy the specified conditions. #[validate(nested)] pub filter: Option, /// Search params for when there is no prefetch #[validate(nested)] pub params: Option, /// Return points with scores better than this threshold. pub score_threshold: Option, /// Max number of points to return. Default is 10. #[validate(range(min = 1))] pub limit: Option, /// Offset of the result. Skip this many points. Default is 0 pub offset: Option, /// Options for specifying which vectors to include into the response. Default is false. pub with_vector: Option, /// Options for specifying which payload to include or not. Default is false. pub with_payload: Option, /// The location to use for IDs lookup, if not specified - use the current collection and the 'using' vector /// Note: the other collection vectors should have the same vector size as the 'using' vector in the current collection #[serde(default)] pub lookup_from: Option, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct QueryRequest { #[validate(nested)] #[serde(flatten)] pub internal: QueryRequestInternal, pub shard_key: Option, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct QueryRequestBatch { #[validate(nested)] pub searches: Vec, } #[derive(Debug, Serialize, JsonSchema)] pub struct QueryResponse { pub points: Vec, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(untagged)] pub enum QueryInterface { Nearest(VectorInput), Query(Query), } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(untagged)] pub enum Query { /// Find the nearest neighbors to this vector. Nearest(NearestQuery), /// Use multiple positive and negative vectors to find the results. Recommend(RecommendQuery), /// Search for nearest points, but constrain the search space with context Discover(DiscoverQuery), /// Return points that live in positive areas. Context(ContextQuery), /// Order the points by a payload field. OrderBy(OrderByQuery), /// Fuse the results of multiple prefetches. Fusion(FusionQuery), /// Sample points from the collection, non-deterministically. Sample(SampleQuery), } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct NearestQuery { pub nearest: VectorInput, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct RecommendQuery { pub recommend: RecommendInput, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct DiscoverQuery { pub discover: DiscoverInput, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct ContextQuery { pub context: ContextInput, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct OrderByQuery { pub order_by: OrderByInterface, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct FusionQuery { pub fusion: Fusion, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub struct SampleQuery { pub sample: Sample, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct Prefetch { /// Sub-requests to perform first. If present, the query will be performed on the results of the prefetches. #[validate(nested)] #[serde(default, with = "MaybeOneOrMany")] #[schemars(with = "MaybeOneOrMany")] pub prefetch: Option>, /// Query to perform. If missing without prefetches, returns points ordered by their IDs. #[validate(nested)] pub query: Option, /// Define which vector name to use for querying. If missing, the default vector is used. pub using: Option, /// Filter conditions - return only those points that satisfy the specified conditions. #[validate(nested)] pub filter: Option, /// Search params for when there is no prefetch #[validate(nested)] pub params: Option, /// Return points with scores better than this threshold. pub score_threshold: Option, /// Max number of points to return. Default is 10. #[validate(range(min = 1))] pub limit: Option, /// The location to use for IDs lookup, if not specified - use the current collection and the 'using' vector /// Note: the other collection vectors should have the same vector size as the 'using' vector in the current collection #[serde(default)] pub lookup_from: Option, } /// How to use positive and negative examples to find the results, default is `average_vector`: /// /// * `average_vector` - Average positive and negative vectors and create a single query /// with the formula `query = avg_pos + avg_pos - avg_neg`. Then performs normal search. /// /// * `best_score` - Uses custom search objective. Each candidate is compared against all /// examples, its score is then chosen from the `max(max_pos_score, max_neg_score)`. /// If the `max_neg_score` is chosen then it is squared and negated, otherwise it is just /// the `max_pos_score`. #[derive(Debug, Deserialize, Serialize, JsonSchema, Default, PartialEq, Clone, Copy)] #[serde(rename_all = "snake_case")] pub enum RecommendStrategy { #[default] AverageVector, BestScore, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct RecommendInput { /// Look for vectors closest to the vectors from these points pub positive: Option>, /// Try to avoid vectors like the vector from these points pub negative: Option>, /// How to use the provided vectors to find the results pub strategy: Option, } impl RecommendInput { pub fn iter(&self) -> impl Iterator { self.positive .iter() .flatten() .chain(self.negative.iter().flatten()) } } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct DiscoverInput { /// Use this as the primary search objective #[validate(nested)] pub target: VectorInput, /// Search space will be constrained by these pairs of vectors #[validate(nested)] #[serde(with = "MaybeOneOrMany")] #[schemars(with = "MaybeOneOrMany")] pub context: Option>, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct ContextInput( /// Search space will be constrained by these pairs of vectors #[serde(with = "MaybeOneOrMany")] #[schemars(with = "MaybeOneOrMany")] pub Option>, ); #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct ContextPair { /// A positive vector #[validate(nested)] pub positive: VectorInput, /// Repel from this vector #[validate(nested)] pub negative: VectorInput, } impl ContextPair { pub fn iter(&self) -> impl Iterator { std::iter::once(&self.positive).chain(std::iter::once(&self.negative)) } } #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum Sample { Random, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct WithLookup { /// Name of the collection to use for points lookup #[serde(rename = "collection")] pub collection_name: String, /// Options for specifying which payload to include (or not) #[serde(default = "default_with_payload")] pub with_payload: Option, /// Options for specifying which vectors to include (or not) #[serde(alias = "with_vector")] #[serde(default)] pub with_vectors: Option, } #[allow(clippy::unnecessary_wraps)] // Used as serde default const fn default_with_payload() -> Option { Some(WithPayloadInterface::Bool(true)) } #[derive(Serialize, Deserialize, JsonSchema, Debug, Clone, PartialEq)] #[serde(untagged)] pub enum WithLookupInterface { Collection(String), WithLookup(WithLookup), } /// Defines a location to use for looking up the vector. /// Specifies collection and vector field name. #[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq)] #[serde(rename_all = "snake_case")] pub struct LookupLocation { /// Name of the collection used for lookup pub collection: String, /// Optional name of the vector field within the collection. /// If not provided, the default vector field will be used. #[serde(default)] pub vector: Option, /// Specify in which shards to look for the points, if not specified - look in all shards #[serde(default, skip_serializing_if = "Option::is_none")] pub shard_key: Option, } #[derive(Validate, Serialize, Deserialize, JsonSchema, Debug, Clone, PartialEq)] pub struct BaseGroupRequest { /// Payload field to group by, must be a string or number field. /// If the field contains more than 1 value, all values will be used for grouping. /// One point can be in multiple groups. #[schemars(length(min = 1))] pub group_by: JsonPath, /// Maximum amount of points to return per group #[validate(range(min = 1))] pub group_size: u32, /// Maximum amount of groups to return #[validate(range(min = 1))] pub limit: u32, /// Look for points in another collection using the group ids pub with_lookup: Option, } #[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone)] pub struct SearchGroupsRequestInternal { /// Look for vectors closest to this #[validate(nested)] pub vector: NamedVectorStruct, /// Look only for points which satisfies this conditions #[validate(nested)] pub filter: Option, /// Additional search params #[validate(nested)] pub params: Option, /// Select which payload to return with the response. Default is false. pub with_payload: Option, /// Options for specifying which vectors to include into response. Default is false. #[serde(default, alias = "with_vectors")] pub with_vector: Option, /// Define a minimal score threshold for the result. /// If defined, less similar results will not be returned. /// Score of the returned result might be higher or smaller than the threshold depending on the /// Distance function used. E.g. for cosine similarity only higher scores will be returned. pub score_threshold: Option, #[serde(flatten)] #[validate(nested)] pub group_request: BaseGroupRequest, } /// Search request. /// Holds all conditions and parameters for the search of most similar points by vector similarity /// given the filtering restrictions. #[derive(Deserialize, Serialize, JsonSchema, Validate, Clone, Debug, PartialEq)] #[serde(rename_all = "snake_case")] pub struct SearchRequestInternal { /// Look for vectors closest to this #[validate(nested)] pub vector: NamedVectorStruct, /// Look only for points which satisfies this conditions #[validate(nested)] pub filter: Option, /// Additional search params #[validate(nested)] pub params: Option, /// Max number of result to return #[serde(alias = "top")] #[validate(range(min = 1))] pub limit: usize, /// Offset of the first result to return. /// May be used to paginate results. /// Note: large offset values may cause performance issues. pub offset: Option, /// Select which payload to return with the response. Default is false. pub with_payload: Option, /// Options for specifying which vectors to include into response. Default is false. #[serde(default, alias = "with_vectors")] pub with_vector: Option, /// Define a minimal score threshold for the result. /// If defined, less similar results will not be returned. /// Score of the returned result might be higher or smaller than the threshold depending on the /// Distance function used. E.g. for cosine similarity only higher scores will be returned. pub score_threshold: Option, } #[derive(Validate, Serialize, Deserialize, JsonSchema, Debug, Clone, PartialEq)] pub struct QueryBaseGroupRequest { /// Payload field to group by, must be a string or number field. /// If the field contains more than 1 value, all values will be used for grouping. /// One point can be in multiple groups. #[schemars(length(min = 1))] pub group_by: JsonPath, /// Maximum amount of points to return per group. Default is 3. #[validate(range(min = 1))] pub group_size: Option, /// Maximum amount of groups to return. Default is 10. #[validate(range(min = 1))] pub limit: Option, /// Look for points in another collection using the group ids pub with_lookup: Option, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct QueryGroupsRequestInternal { /// Sub-requests to perform first. If present, the query will be performed on the results of the prefetch(es). #[validate(nested)] #[serde(default, with = "MaybeOneOrMany")] #[schemars(with = "MaybeOneOrMany")] pub prefetch: Option>, /// Query to perform. If missing without prefetches, returns points ordered by their IDs. #[validate(nested)] pub query: Option, /// Define which vector name to use for querying. If missing, the default vector is used. pub using: Option, /// Filter conditions - return only those points that satisfy the specified conditions. #[validate(nested)] pub filter: Option, /// Search params for when there is no prefetch #[validate(nested)] pub params: Option, /// Return points with scores better than this threshold. pub score_threshold: Option, /// Options for specifying which vectors to include into the response. Default is false. pub with_vector: Option, /// Options for specifying which payload to include or not. Default is false. pub with_payload: Option, /// The location to use for IDs lookup, if not specified - use the current collection and the 'using' vector /// Note: the other collection vectors should have the same vector size as the 'using' vector in the current collection #[serde(default)] pub lookup_from: Option, #[serde(flatten)] #[validate(nested)] pub group_request: QueryBaseGroupRequest, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct QueryGroupsRequest { #[validate(nested)] #[serde(flatten)] pub search_group_request: QueryGroupsRequestInternal, pub shard_key: Option, } #[derive(Serialize, Deserialize, JsonSchema, Validate, Debug, PartialEq)] #[serde(rename_all = "snake_case")] pub struct SearchMatrixRequestInternal { /// Look only for points which satisfies this conditions #[validate(nested)] pub filter: Option, /// How many points to select and search within. Default is 10. #[validate(range(min = 2))] pub sample: Option, /// How many neighbours per sample to find. Default is 3. #[validate(range(min = 1))] pub limit: Option, /// Define which vector name to use for querying. If missing, the default vector is used. pub using: Option, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] #[serde(rename_all = "snake_case")] pub struct SearchMatrixRequest { #[serde(flatten)] #[validate(nested)] pub search_request: SearchMatrixRequestInternal, /// Specify in which shards to look for the points, if not specified - look in all shards #[serde(default, skip_serializing_if = "Option::is_none")] pub shard_key: Option, } #[derive(Debug, Serialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] pub struct SearchMatrixOffsetsResponse { /// Row indices of the matrix pub offsets_row: Vec, /// Column indices of the matrix pub offsets_col: Vec, /// Scores associated with matrix coordinates pub scores: Vec, /// Ids of the points in order pub ids: Vec, } #[derive(Debug, Serialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] /// Pair of points (a, b) with score pub struct SearchMatrixPair { pub a: PointIdType, pub b: PointIdType, pub score: ScoreType, } impl SearchMatrixPair { pub fn new(a: impl Into, b: impl Into, score: ScoreType) -> Self { Self { a: a.into(), b: b.into(), score, } } } #[derive(Debug, Serialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] pub struct SearchMatrixPairsResponse { /// List of pairs of points with scores pub pairs: Vec, } #[derive(Debug, JsonSchema, Serialize, Deserialize, Validate)] pub struct FacetRequestInternal { /// Payload key to use for faceting. pub key: JsonPath, /// Max number of hits to return. Default is 10. #[validate(range(min = 1))] pub limit: Option, /// Filter conditions - only consider points that satisfy these conditions. pub filter: Option, /// Whether to do a more expensive exact count for each of the values in the facet. Default is false. pub exact: Option, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] pub struct FacetRequest { #[validate(nested)] #[serde(flatten)] pub facet_request: FacetRequestInternal, pub shard_key: Option, } #[derive(Debug, Serialize, JsonSchema)] #[serde(untagged)] pub enum FacetValue { String(String), Integer(IntPayloadType), Bool(bool), } #[derive(Debug, Serialize, JsonSchema)] pub struct FacetValueHit { pub value: FacetValue, pub count: usize, } #[derive(Debug, Serialize, JsonSchema)] pub struct FacetResponse { pub hits: Vec, } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, Validate)] #[serde(rename_all = "snake_case")] pub struct PointStruct { /// Point id pub id: PointIdType, /// Vectors #[serde(alias = "vectors")] #[validate(nested)] pub vector: VectorStruct, /// Payload values (optional) pub payload: Option, } #[derive(Debug, Deserialize, Serialize, Clone, Validate, JsonSchema)] pub struct PointsBatch { #[validate(nested)] pub batch: Batch, #[serde(default, skip_serializing_if = "Option::is_none")] pub shard_key: Option, } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] pub struct PointVectors { /// Point id pub id: PointIdType, /// Vectors #[serde(alias = "vectors")] pub vector: VectorStruct, } #[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone)] pub struct UpdateVectors { /// Points with named vectors #[validate(nested)] #[validate(length(min = 1, message = "must specify points to update"))] pub points: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] pub shard_key: Option, } #[derive(Debug, Deserialize, Serialize, Clone, JsonSchema, Validate)] pub struct PointsList { #[validate(nested)] pub points: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] pub shard_key: Option, } impl<'de> serde::Deserialize<'de> for PointInsertOperations { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let value = serde_json::Value::deserialize(deserializer)?; match value { serde_json::Value::Object(map) => { if map.contains_key("batch") { PointsBatch::deserialize(serde_json::Value::Object(map)) .map(PointInsertOperations::PointsBatch) .map_err(serde::de::Error::custom) } else if map.contains_key("points") { PointsList::deserialize(serde_json::Value::Object(map)) .map(PointInsertOperations::PointsList) .map_err(serde::de::Error::custom) } else { Err(serde::de::Error::custom( "Invalid PointInsertOperations format", )) } } _ => Err(serde::de::Error::custom( "Invalid PointInsertOperations format", )), } } } #[derive(Debug, Serialize, Clone, JsonSchema)] #[serde(untagged)] pub enum PointInsertOperations { /// Inset points from a batch. PointsBatch(PointsBatch), /// Insert points from a list PointsList(PointsList), } impl Validate for PointInsertOperations { fn validate(&self) -> Result<(), ValidationErrors> { match self { PointInsertOperations::PointsBatch(batch) => batch.validate(), PointInsertOperations::PointsList(list) => list.validate(), } } }