colibri.qdrant / lib /segment /src /fixtures /payload_fixtures.rs
Gouzi Mohaled
Ajout du dossier lib
84d2a97
use std::ops::{Range, RangeInclusive};
use fnv::FnvBuildHasher;
use indexmap::IndexSet;
use itertools::Itertools;
use rand::distributions::{Alphanumeric, DistString};
use rand::seq::SliceRandom;
use rand::Rng;
use serde_json::{json, Value};
use crate::data_types::vectors::{DenseVector, MultiDenseVectorInternal, VectorElementType};
use crate::types::{
AnyVariants, Condition, ExtendedPointId, FieldCondition, Filter, HasIdCondition,
IsEmptyCondition, Match, MatchAny, Payload, PayloadField, Range as RangeCondition, ValuesCount,
};
const ADJECTIVE: &[&str] = &[
"jobless",
"rightful",
"breakable",
"impartial",
"shocking",
"faded",
"phobic",
"overt",
"like",
"wide-eyed",
"broad",
];
const NOUN: &[&str] = &[
"territory",
"jam",
"neck",
"chicken",
"cap",
"kiss",
"veil",
"trail",
"size",
"digestion",
"rod",
"seed",
];
const INT_RANGE: Range<i64> = 0..500;
pub const LON_RANGE: Range<f64> = -180.0..180.0;
pub const LAT_RANGE: Range<f64> = -90.0..90.0;
pub const STR_KEY: &str = "kvd";
pub const STR_PROJ_KEY: &str = "kvd_proj";
pub const STR_ROOT_PROJ_KEY: &str = "kvd_root_proj";
pub const INT_KEY: &str = "int";
pub const INT_KEY_2: &str = "int2";
pub const INT_KEY_3: &str = "int3";
pub const FLT_KEY: &str = "flt";
pub const FLICKING_KEY: &str = "flicking";
pub const GEO_KEY: &str = "geo";
pub const TEXT_KEY: &str = "text";
pub const BOOL_KEY: &str = "bool";
pub fn random_adj<R: Rng + ?Sized>(rnd_gen: &mut R) -> String {
ADJECTIVE.choose(rnd_gen).copied().unwrap().to_string()
}
pub fn random_keyword<R: Rng + ?Sized>(rnd_gen: &mut R) -> String {
let random_adj = ADJECTIVE.choose(rnd_gen).unwrap();
let random_noun = NOUN.choose(rnd_gen).unwrap();
format!("{random_adj} {random_noun}")
}
pub fn random_keyword_payload<R: Rng + ?Sized>(
rnd_gen: &mut R,
num_values: RangeInclusive<usize>,
) -> Value {
let sample_num_values = rnd_gen.gen_range(num_values);
if sample_num_values > 1 {
Value::Array(
(0..sample_num_values)
.map(|_| Value::String(random_keyword(rnd_gen)))
.collect(),
)
} else {
Value::String(random_keyword(rnd_gen))
}
}
pub fn random_int_payload<R: Rng + ?Sized>(
rnd_gen: &mut R,
num_values: RangeInclusive<usize>,
) -> Vec<i64> {
(0..rnd_gen.gen_range(num_values))
.map(|_| rnd_gen.gen_range(INT_RANGE))
.collect_vec()
}
pub fn random_geo_payload<R: Rng + ?Sized>(
rnd_gen: &mut R,
num_values: RangeInclusive<usize>,
) -> Vec<Value> {
(0..rnd_gen.gen_range(num_values))
.map(|_| {
json!( {
"lon": rnd_gen.gen_range(LON_RANGE),
"lat": rnd_gen.gen_range(LAT_RANGE),
})
})
.collect_vec()
}
pub fn random_bool_payload<R: Rng + ?Sized>(
rnd_gen: &mut R,
num_values: RangeInclusive<usize>,
) -> Vec<Value> {
(0..rnd_gen.gen_range(num_values))
.map(|_| Value::Bool(rnd_gen.gen()))
.collect_vec()
}
pub fn random_vector<R: Rng + ?Sized>(rnd_gen: &mut R, size: usize) -> DenseVector {
(0..size).map(|_| rnd_gen.gen()).collect()
}
pub fn random_dense_byte_vector<R: Rng + ?Sized>(rnd_gen: &mut R, size: usize) -> DenseVector {
(0..size)
.map(|_| {
rnd_gen
.gen_range::<VectorElementType, _>(0.0..=255.0)
.round()
})
.collect()
}
pub fn random_multi_vector<R: Rng + ?Sized>(
rnd_gen: &mut R,
vector_size: usize,
num_vector_per_points: usize,
) -> MultiDenseVectorInternal {
let mut vectors = vec![];
for _ in 0..num_vector_per_points {
let vec = random_vector(rnd_gen, vector_size);
vectors.extend(vec);
}
MultiDenseVectorInternal::new(vectors, vector_size)
}
pub fn random_uncommon_condition<R: Rng + ?Sized>(rnd_gen: &mut R) -> Condition {
let switch = rnd_gen.gen_range(0..=3);
match switch {
0 => Condition::Field(FieldCondition::new_values_count(
STR_KEY.parse().unwrap(),
ValuesCount {
lt: None,
gt: None,
gte: Some(3),
lte: None,
},
)),
1 => Condition::Field(FieldCondition::new_values_count(
STR_KEY.parse().unwrap(),
ValuesCount {
lt: None,
gt: None,
gte: None,
lte: Some(2),
},
)),
2 => Condition::HasId(HasIdCondition {
has_id: (0..rnd_gen.gen_range(10..50))
.map(|_| ExtendedPointId::NumId(rnd_gen.gen_range(0..1000)))
.collect(),
}),
3 => Condition::IsEmpty(IsEmptyCondition {
is_empty: PayloadField {
key: FLICKING_KEY.parse().unwrap(),
},
}),
_ => unreachable!(),
}
}
pub fn random_simple_condition<R: Rng + ?Sized>(rnd_gen: &mut R) -> Condition {
let str_or_int: bool = rnd_gen.gen();
if str_or_int {
let kv_or_txt: bool = rnd_gen.gen();
if kv_or_txt {
Condition::Field(FieldCondition::new_match(
STR_KEY.parse().unwrap(),
random_keyword(rnd_gen).into(),
))
} else {
Condition::Field(FieldCondition::new_match(
TEXT_KEY.parse().unwrap(),
Match::Text(random_adj(rnd_gen).into()),
))
}
} else {
Condition::Field(FieldCondition::new_range(
INT_KEY.parse().unwrap(),
RangeCondition {
lt: None,
gt: None,
gte: Some(rnd_gen.gen_range(INT_RANGE) as f64),
lte: Some(rnd_gen.gen_range(INT_RANGE) as f64),
},
))
}
}
pub fn random_condition<R: Rng + ?Sized>(rnd_gen: &mut R) -> Condition {
let is_simple: bool = rnd_gen.gen_range(0..100) < 80;
if is_simple {
random_simple_condition(rnd_gen)
} else {
random_uncommon_condition(rnd_gen)
}
}
pub fn random_must_filter<R: Rng + ?Sized>(rnd_gen: &mut R, num_conditions: usize) -> Filter {
let must_conditions = (0..num_conditions)
.map(|_| random_simple_condition(rnd_gen))
.collect_vec();
Filter {
should: None,
min_should: None,
must: Some(must_conditions),
must_not: None,
}
}
pub fn random_match_any_filter<R: Rng + ?Sized>(
rnd_gen: &mut R,
len: usize,
percent_existing: f32,
) -> Filter {
let num_existing = (len as f32 * (percent_existing / 100.0)) as usize;
let mut values: IndexSet<String, FnvBuildHasher> = (0..len - num_existing)
.map(|_| {
let slen = rnd_gen.gen_range(1..15);
Alphanumeric.sample_string(rnd_gen, slen)
})
.collect();
values.extend((0..num_existing).map(|_| random_keyword(rnd_gen)));
Filter {
should: None,
must: Some(vec![Condition::Field(FieldCondition::new_match(
STR_KEY.parse().unwrap(),
Match::Any(MatchAny {
any: AnyVariants::Strings(values),
}),
))]),
must_not: None,
min_should: None,
}
}
pub fn random_filter<R: Rng + ?Sized>(rnd_gen: &mut R, total_conditions: usize) -> Filter {
let num_should = rnd_gen.gen_range(0..=total_conditions);
let num_must = total_conditions - num_should;
let should_conditions = (0..num_should)
.map(|_| random_condition(rnd_gen))
.collect_vec();
let should_conditions_opt = if !should_conditions.is_empty() {
Some(should_conditions)
} else {
None
};
let must_conditions = (0..num_must)
.map(|_| random_condition(rnd_gen))
.collect_vec();
let must_conditions_opt = if !must_conditions.is_empty() {
Some(must_conditions)
} else {
None
};
Filter {
should: should_conditions_opt,
min_should: None,
must: must_conditions_opt,
must_not: None,
}
}
pub fn random_nested_filter<R: Rng + ?Sized>(rnd_gen: &mut R) -> Filter {
let nested_or_proj: bool = rnd_gen.gen();
let nested_str_key = if nested_or_proj {
format!("{}.{}.{}", STR_KEY, "nested_1", "nested_2")
} else {
format!("{}.{}[].{}", STR_PROJ_KEY, "nested_1", "nested_2")
};
let condition = Condition::Field(FieldCondition::new_match(
nested_str_key.parse().unwrap(),
random_keyword(rnd_gen).into(),
));
Filter::new_should(condition)
}
fn random_json<R: Rng + ?Sized>(rnd_gen: &mut R) -> Value {
if rnd_gen.gen_range(0.0..1.0) < 0.5 {
json!({
STR_KEY: random_keyword_payload(rnd_gen, 1..=3),
INT_KEY: random_int_payload(rnd_gen, 1..=3),
INT_KEY_2: random_int_payload(rnd_gen, 1..=2),
INT_KEY_3: random_int_payload(rnd_gen, 1..=2),
FLT_KEY: rnd_gen.gen_range(0.0..10.0),
GEO_KEY: random_geo_payload(rnd_gen, 1..=3),
TEXT_KEY: random_keyword_payload(rnd_gen, 1..=1),
BOOL_KEY: random_bool_payload(rnd_gen, 1..=1),
})
} else {
json!({
STR_KEY: random_keyword_payload(rnd_gen, 1..=2),
INT_KEY: random_int_payload(rnd_gen, 1..=3),
INT_KEY_2: random_int_payload(rnd_gen, 1..=2),
INT_KEY_3: random_int_payload(rnd_gen, 1..=2),
FLT_KEY: rnd_gen.gen_range(0.0..10.0),
GEO_KEY: random_geo_payload(rnd_gen, 1..=3),
TEXT_KEY: random_keyword_payload(rnd_gen, 1..=1),
BOOL_KEY: random_bool_payload(rnd_gen, 1..=2),
FLICKING_KEY: random_int_payload(rnd_gen, 1..=3)
})
}
}
pub fn generate_diverse_payload<R: Rng + ?Sized>(rnd_gen: &mut R) -> Payload {
random_json(rnd_gen).into()
}
pub fn generate_diverse_nested_payload<R: Rng + ?Sized>(rnd_gen: &mut R) -> Payload {
json!({
STR_KEY: {
"nested_1": {
"nested_2": random_keyword_payload(rnd_gen, 1..=3)
}
},
STR_PROJ_KEY: {
"nested_1": [
{ "nested_2": random_keyword_payload(rnd_gen, 1..=3) }
]
},
STR_ROOT_PROJ_KEY: [
{
"nested_1": [
{ "nested_2": random_keyword_payload(rnd_gen, 1..=3) }
]
}
],
})
.into()
}