Gouzi Mohaled
Ajout du dossier lib
84d2a97
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use common::types::ScoreType;
use common::validation::validate_multi_vector;
use schemars::JsonSchema;
use segment::common::utils::MaybeOneOrMany;
use segment::data_types::order_by::OrderBy;
use segment::json_path::JsonPath;
use segment::types::{
Filter, IntPayloadType, Payload, PointIdType, SearchParams, ShardKey, WithPayloadInterface,
WithVector,
};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use sparse::common::sparse_vector::SparseVector;
use validator::{Validate, ValidationErrors};
/// Type for dense vector
pub type DenseVector = Vec<segment::data_types::vectors::VectorElementType>;
/// Type for multi dense vector
pub type MultiDenseVector = Vec<DenseVector>;
/// Vector Data
/// Vectors can be described directly with values
/// Or specified with source "objects" for inference
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
#[serde(untagged, rename_all = "snake_case")]
pub enum Vector {
Dense(DenseVector),
Sparse(sparse::common::sparse_vector::SparseVector),
MultiDense(MultiDenseVector),
Document(Document),
Image(Image),
Object(InferenceObject),
}
/// Vector Data stored in Point
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
#[serde(untagged, rename_all = "snake_case")]
pub enum VectorOutput {
Dense(DenseVector),
Sparse(sparse::common::sparse_vector::SparseVector),
MultiDense(MultiDenseVector),
}
impl Validate for Vector {
fn validate(&self) -> Result<(), validator::ValidationErrors> {
match self {
Vector::Dense(_) => Ok(()),
Vector::Sparse(v) => v.validate(),
Vector::MultiDense(m) => validate_multi_vector(m),
Vector::Document(_) => Ok(()),
Vector::Image(_) => Ok(()),
Vector::Object(_) => Ok(()),
}
}
}
fn vector_example() -> DenseVector {
vec![0.875, 0.140625, 0.8976]
}
fn multi_dense_vector_example() -> MultiDenseVector {
vec![
vec![0.875, 0.140625, 0.1102],
vec![0.758, 0.28126, 0.96871],
vec![0.621, 0.421878, 0.9375],
]
}
fn named_vector_example() -> HashMap<String, Vector> {
let mut map = HashMap::new();
map.insert(
"image-embeddings".to_string(),
Vector::Dense(vec![0.873, 0.140625, 0.8976]),
);
map
}
/// Full vector data per point separator with single and multiple vector modes
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
#[serde(untagged, rename_all = "snake_case")]
pub enum VectorStruct {
#[schemars(example = "vector_example")]
Single(DenseVector),
#[schemars(example = "multi_dense_vector_example")]
MultiDense(MultiDenseVector),
#[schemars(example = "named_vector_example")]
Named(HashMap<String, Vector>),
Document(Document),
Image(Image),
Object(InferenceObject),
}
/// Vector data stored in Point
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
#[serde(untagged, rename_all = "snake_case")]
pub enum VectorStructOutput {
#[schemars(example = "vector_example")]
Single(DenseVector),
#[schemars(example = "multi_dense_vector_example")]
MultiDense(MultiDenseVector),
#[schemars(example = "named_vector_example")]
Named(HashMap<String, VectorOutput>),
}
impl VectorStruct {
/// Check if this vector struct is empty.
pub fn is_empty(&self) -> bool {
match self {
VectorStruct::Single(vector) => vector.is_empty(),
VectorStruct::MultiDense(vector) => vector.is_empty(),
VectorStruct::Named(vectors) => vectors.values().all(|v| match v {
Vector::Dense(vector) => vector.is_empty(),
Vector::Sparse(vector) => vector.indices.is_empty(),
Vector::MultiDense(vector) => vector.is_empty(),
Vector::Document(_) => false,
Vector::Image(_) => false,
Vector::Object(_) => false,
}),
VectorStruct::Document(_) => false,
VectorStruct::Image(_) => false,
VectorStruct::Object(_) => false,
}
}
}
impl Validate for VectorStruct {
fn validate(&self) -> Result<(), validator::ValidationErrors> {
match self {
VectorStruct::Single(_) => Ok(()),
VectorStruct::MultiDense(v) => validate_multi_vector(v),
VectorStruct::Named(v) => common::validation::validate_iter(v.values()),
VectorStruct::Document(_) => Ok(()),
VectorStruct::Image(_) => Ok(()),
VectorStruct::Object(_) => Ok(()),
}
}
}
#[derive(Clone, Default, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema)]
pub struct Options {
/// Parameters for the model
/// Values of the parameters are model-specific
pub options: Option<HashMap<String, Value>>,
}
impl Hash for Options {
fn hash<H: Hasher>(&self, state: &mut H) {
// Order of keys in the map should not affect the hash
if let Some(options) = &self.options {
let mut keys: Vec<_> = options.keys().collect();
keys.sort();
for key in keys {
key.hash(state);
options.get(key).unwrap().hash(state);
}
}
}
}
/// WARN: Work-in-progress, unimplemented
///
/// Text document for embedding. Requires inference infrastructure, unimplemented.
#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Hash, Validate)]
pub struct Document {
/// Text of the document
/// This field will be used as input for the embedding model
#[schemars(example = "document_text_example")]
pub text: String,
/// Name of the model used to generate the vector
/// List of available models depends on a provider
#[validate(length(min = 1))]
#[schemars(length(min = 1), example = "model_example")]
pub model: String,
#[serde(flatten)]
pub options: Options,
}
/// WARN: Work-in-progress, unimplemented
///
/// Image object for embedding. Requires inference infrastructure, unimplemented.
#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Validate, Hash)]
pub struct Image {
/// Image data: base64 encoded image or an URL
#[schemars(example = "image_value_example")]
pub image: Value,
/// Name of the model used to generate the vector
/// List of available models depends on a provider
#[validate(length(min = 1))]
#[schemars(length(min = 1), example = "image_model_example")]
pub model: String,
/// Parameters for the model
/// Values of the parameters are model-specific
#[serde(flatten)]
pub options: Options,
}
/// WARN: Work-in-progress, unimplemented
///
/// Custom object for embedding. Requires inference infrastructure, unimplemented.
#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Hash, Validate)]
pub struct InferenceObject {
/// Arbitrary data, used as input for the embedding model
/// Used if the model requires more than one input or a custom input
pub object: Value,
/// Name of the model used to generate the vector
/// List of available models depends on a provider
#[validate(length(min = 1))]
#[schemars(length(min = 1), example = "model_example")]
pub model: String,
/// Parameters for the model
/// Values of the parameters are model-specific
#[serde(flatten)]
pub options: Options,
}
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
#[serde(untagged, rename_all = "snake_case")]
pub enum BatchVectorStruct {
Single(Vec<DenseVector>),
MultiDense(Vec<MultiDenseVector>),
Named(HashMap<String, Vec<Vector>>),
Document(Vec<Document>),
Image(Vec<Image>),
Object(Vec<InferenceObject>),
}
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct Batch {
pub ids: Vec<PointIdType>,
pub vectors: BatchVectorStruct,
pub payloads: Option<Vec<Option<Payload>>>,
}
#[derive(Debug, Deserialize, Serialize, Clone, JsonSchema, PartialEq)]
#[serde(untagged)]
pub enum ShardKeySelector {
ShardKey(ShardKey),
ShardKeys(Vec<ShardKey>),
// ToDo: select by pattern
}
fn version_example() -> segment::types::SeqNumberType {
3
}
fn score_example() -> common::types::ScoreType {
0.75
}
fn document_text_example() -> String {
"This is a document text".to_string()
}
fn model_example() -> String {
"jinaai/jina-embeddings-v2-base-en".to_string()
}
fn image_value_example() -> String {
"https://example.com/image.jpg".to_string()
}
fn image_model_example() -> String {
"Qdrant/clip-ViT-B-32-vision".to_string()
}
/// Search result
#[derive(Serialize, JsonSchema, Clone, Debug)]
pub struct ScoredPoint {
/// Point id
pub id: PointIdType,
/// Point version
#[schemars(example = "version_example")]
pub version: segment::types::SeqNumberType,
/// Points vector distance to the query vector
#[schemars(example = "score_example")]
pub score: ScoreType,
/// Payload - values assigned to the point
#[serde(skip_serializing_if = "Option::is_none")]
pub payload: Option<segment::types::Payload>,
/// Vector of the point
#[serde(skip_serializing_if = "Option::is_none")]
pub vector: Option<VectorStructOutput>,
/// Shard Key
#[serde(skip_serializing_if = "Option::is_none")]
pub shard_key: Option<ShardKey>,
/// Order-by value
#[serde(skip_serializing_if = "Option::is_none")]
pub order_value: Option<segment::data_types::order_by::OrderValue>,
}
/// Point data
#[derive(Clone, Debug, PartialEq, Serialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct Record {
/// Id of the point
pub id: segment::types::PointIdType,
/// Payload - values assigned to the point
#[serde(skip_serializing_if = "Option::is_none")]
pub payload: Option<segment::types::Payload>,
/// Vector of the point
#[serde(skip_serializing_if = "Option::is_none")]
pub vector: Option<VectorStructOutput>,
/// Shard Key
#[serde(skip_serializing_if = "Option::is_none")]
pub shard_key: Option<segment::types::ShardKey>,
#[serde(skip_serializing_if = "Option::is_none")]
pub order_value: Option<segment::data_types::order_by::OrderValue>,
}
/// Vector data separator for named and unnamed modes
/// Unnamed mode:
///
/// {
/// "vector": [1.0, 2.0, 3.0]
/// }
///
/// or named mode:
///
/// {
/// "vector": {
/// "vector": [1.0, 2.0, 3.0],
/// "name": "image-embeddings"
/// }
/// }
#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq)]
#[serde(rename_all = "snake_case")]
#[serde(untagged)]
pub enum NamedVectorStruct {
Default(segment::data_types::vectors::DenseVector),
Dense(segment::data_types::vectors::NamedVector),
Sparse(segment::data_types::vectors::NamedSparseVector),
// No support for multi-dense vectors in search
}
#[derive(Deserialize, Serialize, JsonSchema, Clone, Debug, PartialEq)]
#[serde(untagged)]
pub enum OrderByInterface {
Key(JsonPath),
Struct(OrderBy),
}
/// Fusion algorithm allows to combine results of multiple prefetches.
///
/// Available fusion algorithms:
///
/// * `rrf` - Reciprocal Rank Fusion
/// * `dbsf` - Distribution-Based Score Fusion
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum Fusion {
Rrf,
Dbsf,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum VectorInput {
DenseVector(DenseVector),
SparseVector(SparseVector),
MultiDenseVector(MultiDenseVector),
Id(segment::types::PointIdType),
Document(Document),
Image(Image),
Object(InferenceObject),
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct QueryRequestInternal {
/// Sub-requests to perform first. If present, the query will be performed on the results of the prefetch(es).
#[validate(nested)]
#[serde(default, with = "MaybeOneOrMany")]
#[schemars(with = "MaybeOneOrMany<Prefetch>")]
pub prefetch: Option<Vec<Prefetch>>,
/// Query to perform. If missing without prefetches, returns points ordered by their IDs.
#[validate(nested)]
pub query: Option<QueryInterface>,
/// Define which vector name to use for querying. If missing, the default vector is used.
pub using: Option<String>,
/// Filter conditions - return only those points that satisfy the specified conditions.
#[validate(nested)]
pub filter: Option<Filter>,
/// Search params for when there is no prefetch
#[validate(nested)]
pub params: Option<SearchParams>,
/// Return points with scores better than this threshold.
pub score_threshold: Option<ScoreType>,
/// Max number of points to return. Default is 10.
#[validate(range(min = 1))]
pub limit: Option<usize>,
/// Offset of the result. Skip this many points. Default is 0
pub offset: Option<usize>,
/// Options for specifying which vectors to include into the response. Default is false.
pub with_vector: Option<WithVector>,
/// Options for specifying which payload to include or not. Default is false.
pub with_payload: Option<WithPayloadInterface>,
/// The location to use for IDs lookup, if not specified - use the current collection and the 'using' vector
/// Note: the other collection vectors should have the same vector size as the 'using' vector in the current collection
#[serde(default)]
pub lookup_from: Option<LookupLocation>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct QueryRequest {
#[validate(nested)]
#[serde(flatten)]
pub internal: QueryRequestInternal,
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct QueryRequestBatch {
#[validate(nested)]
pub searches: Vec<QueryRequest>,
}
#[derive(Debug, Serialize, JsonSchema)]
pub struct QueryResponse {
pub points: Vec<ScoredPoint>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum QueryInterface {
Nearest(VectorInput),
Query(Query),
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum Query {
/// Find the nearest neighbors to this vector.
Nearest(NearestQuery),
/// Use multiple positive and negative vectors to find the results.
Recommend(RecommendQuery),
/// Search for nearest points, but constrain the search space with context
Discover(DiscoverQuery),
/// Return points that live in positive areas.
Context(ContextQuery),
/// Order the points by a payload field.
OrderBy(OrderByQuery),
/// Fuse the results of multiple prefetches.
Fusion(FusionQuery),
/// Sample points from the collection, non-deterministically.
Sample(SampleQuery),
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct NearestQuery {
pub nearest: VectorInput,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct RecommendQuery {
pub recommend: RecommendInput,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct DiscoverQuery {
pub discover: DiscoverInput,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct ContextQuery {
pub context: ContextInput,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct OrderByQuery {
pub order_by: OrderByInterface,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct FusionQuery {
pub fusion: Fusion,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct SampleQuery {
pub sample: Sample,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct Prefetch {
/// Sub-requests to perform first. If present, the query will be performed on the results of the prefetches.
#[validate(nested)]
#[serde(default, with = "MaybeOneOrMany")]
#[schemars(with = "MaybeOneOrMany<Prefetch>")]
pub prefetch: Option<Vec<Prefetch>>,
/// Query to perform. If missing without prefetches, returns points ordered by their IDs.
#[validate(nested)]
pub query: Option<QueryInterface>,
/// Define which vector name to use for querying. If missing, the default vector is used.
pub using: Option<String>,
/// Filter conditions - return only those points that satisfy the specified conditions.
#[validate(nested)]
pub filter: Option<Filter>,
/// Search params for when there is no prefetch
#[validate(nested)]
pub params: Option<SearchParams>,
/// Return points with scores better than this threshold.
pub score_threshold: Option<ScoreType>,
/// Max number of points to return. Default is 10.
#[validate(range(min = 1))]
pub limit: Option<usize>,
/// The location to use for IDs lookup, if not specified - use the current collection and the 'using' vector
/// Note: the other collection vectors should have the same vector size as the 'using' vector in the current collection
#[serde(default)]
pub lookup_from: Option<LookupLocation>,
}
/// How to use positive and negative examples to find the results, default is `average_vector`:
///
/// * `average_vector` - Average positive and negative vectors and create a single query
/// with the formula `query = avg_pos + avg_pos - avg_neg`. Then performs normal search.
///
/// * `best_score` - Uses custom search objective. Each candidate is compared against all
/// examples, its score is then chosen from the `max(max_pos_score, max_neg_score)`.
/// If the `max_neg_score` is chosen then it is squared and negated, otherwise it is just
/// the `max_pos_score`.
#[derive(Debug, Deserialize, Serialize, JsonSchema, Default, PartialEq, Clone, Copy)]
#[serde(rename_all = "snake_case")]
pub enum RecommendStrategy {
#[default]
AverageVector,
BestScore,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct RecommendInput {
/// Look for vectors closest to the vectors from these points
pub positive: Option<Vec<VectorInput>>,
/// Try to avoid vectors like the vector from these points
pub negative: Option<Vec<VectorInput>>,
/// How to use the provided vectors to find the results
pub strategy: Option<RecommendStrategy>,
}
impl RecommendInput {
pub fn iter(&self) -> impl Iterator<Item = &VectorInput> {
self.positive
.iter()
.flatten()
.chain(self.negative.iter().flatten())
}
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct DiscoverInput {
/// Use this as the primary search objective
#[validate(nested)]
pub target: VectorInput,
/// Search space will be constrained by these pairs of vectors
#[validate(nested)]
#[serde(with = "MaybeOneOrMany")]
#[schemars(with = "MaybeOneOrMany<ContextPair>")]
pub context: Option<Vec<ContextPair>>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct ContextInput(
/// Search space will be constrained by these pairs of vectors
#[serde(with = "MaybeOneOrMany")]
#[schemars(with = "MaybeOneOrMany<ContextPair>")]
pub Option<Vec<ContextPair>>,
);
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct ContextPair {
/// A positive vector
#[validate(nested)]
pub positive: VectorInput,
/// Repel from this vector
#[validate(nested)]
pub negative: VectorInput,
}
impl ContextPair {
pub fn iter(&self) -> impl Iterator<Item = &VectorInput> {
std::iter::once(&self.positive).chain(std::iter::once(&self.negative))
}
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum Sample {
Random,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct WithLookup {
/// Name of the collection to use for points lookup
#[serde(rename = "collection")]
pub collection_name: String,
/// Options for specifying which payload to include (or not)
#[serde(default = "default_with_payload")]
pub with_payload: Option<WithPayloadInterface>,
/// Options for specifying which vectors to include (or not)
#[serde(alias = "with_vector")]
#[serde(default)]
pub with_vectors: Option<WithVector>,
}
#[allow(clippy::unnecessary_wraps)] // Used as serde default
const fn default_with_payload() -> Option<WithPayloadInterface> {
Some(WithPayloadInterface::Bool(true))
}
#[derive(Serialize, Deserialize, JsonSchema, Debug, Clone, PartialEq)]
#[serde(untagged)]
pub enum WithLookupInterface {
Collection(String),
WithLookup(WithLookup),
}
/// Defines a location to use for looking up the vector.
/// Specifies collection and vector field name.
#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq)]
#[serde(rename_all = "snake_case")]
pub struct LookupLocation {
/// Name of the collection used for lookup
pub collection: String,
/// Optional name of the vector field within the collection.
/// If not provided, the default vector field will be used.
#[serde(default)]
pub vector: Option<String>,
/// Specify in which shards to look for the points, if not specified - look in all shards
#[serde(default, skip_serializing_if = "Option::is_none")]
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Validate, Serialize, Deserialize, JsonSchema, Debug, Clone, PartialEq)]
pub struct BaseGroupRequest {
/// Payload field to group by, must be a string or number field.
/// If the field contains more than 1 value, all values will be used for grouping.
/// One point can be in multiple groups.
#[schemars(length(min = 1))]
pub group_by: JsonPath,
/// Maximum amount of points to return per group
#[validate(range(min = 1))]
pub group_size: u32,
/// Maximum amount of groups to return
#[validate(range(min = 1))]
pub limit: u32,
/// Look for points in another collection using the group ids
pub with_lookup: Option<WithLookupInterface>,
}
#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone)]
pub struct SearchGroupsRequestInternal {
/// Look for vectors closest to this
#[validate(nested)]
pub vector: NamedVectorStruct,
/// Look only for points which satisfies this conditions
#[validate(nested)]
pub filter: Option<Filter>,
/// Additional search params
#[validate(nested)]
pub params: Option<SearchParams>,
/// Select which payload to return with the response. Default is false.
pub with_payload: Option<WithPayloadInterface>,
/// Options for specifying which vectors to include into response. Default is false.
#[serde(default, alias = "with_vectors")]
pub with_vector: Option<WithVector>,
/// Define a minimal score threshold for the result.
/// If defined, less similar results will not be returned.
/// Score of the returned result might be higher or smaller than the threshold depending on the
/// Distance function used. E.g. for cosine similarity only higher scores will be returned.
pub score_threshold: Option<ScoreType>,
#[serde(flatten)]
#[validate(nested)]
pub group_request: BaseGroupRequest,
}
/// Search request.
/// Holds all conditions and parameters for the search of most similar points by vector similarity
/// given the filtering restrictions.
#[derive(Deserialize, Serialize, JsonSchema, Validate, Clone, Debug, PartialEq)]
#[serde(rename_all = "snake_case")]
pub struct SearchRequestInternal {
/// Look for vectors closest to this
#[validate(nested)]
pub vector: NamedVectorStruct,
/// Look only for points which satisfies this conditions
#[validate(nested)]
pub filter: Option<Filter>,
/// Additional search params
#[validate(nested)]
pub params: Option<SearchParams>,
/// Max number of result to return
#[serde(alias = "top")]
#[validate(range(min = 1))]
pub limit: usize,
/// Offset of the first result to return.
/// May be used to paginate results.
/// Note: large offset values may cause performance issues.
pub offset: Option<usize>,
/// Select which payload to return with the response. Default is false.
pub with_payload: Option<WithPayloadInterface>,
/// Options for specifying which vectors to include into response. Default is false.
#[serde(default, alias = "with_vectors")]
pub with_vector: Option<WithVector>,
/// Define a minimal score threshold for the result.
/// If defined, less similar results will not be returned.
/// Score of the returned result might be higher or smaller than the threshold depending on the
/// Distance function used. E.g. for cosine similarity only higher scores will be returned.
pub score_threshold: Option<ScoreType>,
}
#[derive(Validate, Serialize, Deserialize, JsonSchema, Debug, Clone, PartialEq)]
pub struct QueryBaseGroupRequest {
/// Payload field to group by, must be a string or number field.
/// If the field contains more than 1 value, all values will be used for grouping.
/// One point can be in multiple groups.
#[schemars(length(min = 1))]
pub group_by: JsonPath,
/// Maximum amount of points to return per group. Default is 3.
#[validate(range(min = 1))]
pub group_size: Option<usize>,
/// Maximum amount of groups to return. Default is 10.
#[validate(range(min = 1))]
pub limit: Option<usize>,
/// Look for points in another collection using the group ids
pub with_lookup: Option<WithLookupInterface>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct QueryGroupsRequestInternal {
/// Sub-requests to perform first. If present, the query will be performed on the results of the prefetch(es).
#[validate(nested)]
#[serde(default, with = "MaybeOneOrMany")]
#[schemars(with = "MaybeOneOrMany<Prefetch>")]
pub prefetch: Option<Vec<Prefetch>>,
/// Query to perform. If missing without prefetches, returns points ordered by their IDs.
#[validate(nested)]
pub query: Option<QueryInterface>,
/// Define which vector name to use for querying. If missing, the default vector is used.
pub using: Option<String>,
/// Filter conditions - return only those points that satisfy the specified conditions.
#[validate(nested)]
pub filter: Option<Filter>,
/// Search params for when there is no prefetch
#[validate(nested)]
pub params: Option<SearchParams>,
/// Return points with scores better than this threshold.
pub score_threshold: Option<ScoreType>,
/// Options for specifying which vectors to include into the response. Default is false.
pub with_vector: Option<WithVector>,
/// Options for specifying which payload to include or not. Default is false.
pub with_payload: Option<WithPayloadInterface>,
/// The location to use for IDs lookup, if not specified - use the current collection and the 'using' vector
/// Note: the other collection vectors should have the same vector size as the 'using' vector in the current collection
#[serde(default)]
pub lookup_from: Option<LookupLocation>,
#[serde(flatten)]
#[validate(nested)]
pub group_request: QueryBaseGroupRequest,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct QueryGroupsRequest {
#[validate(nested)]
#[serde(flatten)]
pub search_group_request: QueryGroupsRequestInternal,
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Serialize, Deserialize, JsonSchema, Validate, Debug, PartialEq)]
#[serde(rename_all = "snake_case")]
pub struct SearchMatrixRequestInternal {
/// Look only for points which satisfies this conditions
#[validate(nested)]
pub filter: Option<Filter>,
/// How many points to select and search within. Default is 10.
#[validate(range(min = 2))]
pub sample: Option<usize>,
/// How many neighbours per sample to find. Default is 3.
#[validate(range(min = 1))]
pub limit: Option<usize>,
/// Define which vector name to use for querying. If missing, the default vector is used.
pub using: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
#[serde(rename_all = "snake_case")]
pub struct SearchMatrixRequest {
#[serde(flatten)]
#[validate(nested)]
pub search_request: SearchMatrixRequestInternal,
/// Specify in which shards to look for the points, if not specified - look in all shards
#[serde(default, skip_serializing_if = "Option::is_none")]
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Debug, Serialize, JsonSchema, PartialEq)]
#[serde(rename_all = "snake_case")]
pub struct SearchMatrixOffsetsResponse {
/// Row indices of the matrix
pub offsets_row: Vec<u64>,
/// Column indices of the matrix
pub offsets_col: Vec<u64>,
/// Scores associated with matrix coordinates
pub scores: Vec<ScoreType>,
/// Ids of the points in order
pub ids: Vec<PointIdType>,
}
#[derive(Debug, Serialize, JsonSchema, PartialEq)]
#[serde(rename_all = "snake_case")]
/// Pair of points (a, b) with score
pub struct SearchMatrixPair {
pub a: PointIdType,
pub b: PointIdType,
pub score: ScoreType,
}
impl SearchMatrixPair {
pub fn new(a: impl Into<PointIdType>, b: impl Into<PointIdType>, score: ScoreType) -> Self {
Self {
a: a.into(),
b: b.into(),
score,
}
}
}
#[derive(Debug, Serialize, JsonSchema, PartialEq)]
#[serde(rename_all = "snake_case")]
pub struct SearchMatrixPairsResponse {
/// List of pairs of points with scores
pub pairs: Vec<SearchMatrixPair>,
}
#[derive(Debug, JsonSchema, Serialize, Deserialize, Validate)]
pub struct FacetRequestInternal {
/// Payload key to use for faceting.
pub key: JsonPath,
/// Max number of hits to return. Default is 10.
#[validate(range(min = 1))]
pub limit: Option<usize>,
/// Filter conditions - only consider points that satisfy these conditions.
pub filter: Option<Filter>,
/// Whether to do a more expensive exact count for each of the values in the facet. Default is false.
pub exact: Option<bool>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)]
pub struct FacetRequest {
#[validate(nested)]
#[serde(flatten)]
pub facet_request: FacetRequestInternal,
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Debug, Serialize, JsonSchema)]
#[serde(untagged)]
pub enum FacetValue {
String(String),
Integer(IntPayloadType),
Bool(bool),
}
#[derive(Debug, Serialize, JsonSchema)]
pub struct FacetValueHit {
pub value: FacetValue,
pub count: usize,
}
#[derive(Debug, Serialize, JsonSchema)]
pub struct FacetResponse {
pub hits: Vec<FacetValueHit>,
}
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, Validate)]
#[serde(rename_all = "snake_case")]
pub struct PointStruct {
/// Point id
pub id: PointIdType,
/// Vectors
#[serde(alias = "vectors")]
#[validate(nested)]
pub vector: VectorStruct,
/// Payload values (optional)
pub payload: Option<Payload>,
}
#[derive(Debug, Deserialize, Serialize, Clone, Validate, JsonSchema)]
pub struct PointsBatch {
#[validate(nested)]
pub batch: Batch,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)]
pub struct PointVectors {
/// Point id
pub id: PointIdType,
/// Vectors
#[serde(alias = "vectors")]
pub vector: VectorStruct,
}
#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone)]
pub struct UpdateVectors {
/// Points with named vectors
#[validate(nested)]
#[validate(length(min = 1, message = "must specify points to update"))]
pub points: Vec<PointVectors>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub shard_key: Option<ShardKeySelector>,
}
#[derive(Debug, Deserialize, Serialize, Clone, JsonSchema, Validate)]
pub struct PointsList {
#[validate(nested)]
pub points: Vec<PointStruct>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub shard_key: Option<ShardKeySelector>,
}
impl<'de> serde::Deserialize<'de> for PointInsertOperations {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = serde_json::Value::deserialize(deserializer)?;
match value {
serde_json::Value::Object(map) => {
if map.contains_key("batch") {
PointsBatch::deserialize(serde_json::Value::Object(map))
.map(PointInsertOperations::PointsBatch)
.map_err(serde::de::Error::custom)
} else if map.contains_key("points") {
PointsList::deserialize(serde_json::Value::Object(map))
.map(PointInsertOperations::PointsList)
.map_err(serde::de::Error::custom)
} else {
Err(serde::de::Error::custom(
"Invalid PointInsertOperations format",
))
}
}
_ => Err(serde::de::Error::custom(
"Invalid PointInsertOperations format",
)),
}
}
}
#[derive(Debug, Serialize, Clone, JsonSchema)]
#[serde(untagged)]
pub enum PointInsertOperations {
/// Inset points from a batch.
PointsBatch(PointsBatch),
/// Insert points from a list
PointsList(PointsList),
}
impl Validate for PointInsertOperations {
fn validate(&self) -> Result<(), ValidationErrors> {
match self {
PointInsertOperations::PointsBatch(batch) => batch.validate(),
PointInsertOperations::PointsList(list) => list.validate(),
}
}
}