Spaces:
Build error
Build error
File size: 1,873 Bytes
84d2a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
use std::ops::Range;
use rand::Rng;
use crate::common::sparse_vector::SparseVector;
const VALUE_RANGE: Range<f64> = -100.0..100.0;
// Realistic sizing based on experiences with SPLADE
const MAX_VALUES_PER_VECTOR: usize = 300;
/// Generates a non empty sparse vector
pub fn random_sparse_vector<R: Rng + ?Sized>(rnd_gen: &mut R, max_dim_size: usize) -> SparseVector {
let size = rnd_gen.gen_range(1..max_dim_size);
let mut tuples: Vec<(u32, f32)> = vec![];
for i in 1..=size {
// make sure the vector is not too large (for performance reasons)
if tuples.len() == MAX_VALUES_PER_VECTOR {
break;
}
// high probability of skipping a dimension to make the vectors more sparse
let skip = rnd_gen.gen_bool(0.98);
if !skip {
tuples.push((i as u32, rnd_gen.gen_range(VALUE_RANGE) as f32));
}
}
// make sure we have at least one vector
if tuples.is_empty() {
tuples.push((
rnd_gen.gen_range(1..max_dim_size) as u32,
rnd_gen.gen_range(VALUE_RANGE) as f32,
));
}
SparseVector::try_from(tuples).unwrap()
}
/// Generates a sparse vector with all dimensions filled
pub fn random_full_sparse_vector<R: Rng + ?Sized>(
rnd_gen: &mut R,
max_size: usize,
) -> SparseVector {
let mut tuples: Vec<(u32, f32)> = Vec::with_capacity(max_size);
for i in 1..=max_size {
tuples.push((i as u32, rnd_gen.gen_range(VALUE_RANGE) as f32));
}
SparseVector::try_from(tuples).unwrap()
}
/// Generates a sparse vector with only positive values
pub fn random_positive_sparse_vector<R: Rng + ?Sized>(
rnd_gen: &mut R,
max_dim_size: usize,
) -> SparseVector {
let mut vec = random_sparse_vector(rnd_gen, max_dim_size);
for value in vec.values.iter_mut() {
*value = value.abs();
}
vec
}
|