File size: 1,873 Bytes
84d2a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
use std::ops::Range;

use rand::Rng;

use crate::common::sparse_vector::SparseVector;

const VALUE_RANGE: Range<f64> = -100.0..100.0;
// Realistic sizing based on experiences with SPLADE
const MAX_VALUES_PER_VECTOR: usize = 300;

/// Generates a non empty sparse vector
pub fn random_sparse_vector<R: Rng + ?Sized>(rnd_gen: &mut R, max_dim_size: usize) -> SparseVector {
    let size = rnd_gen.gen_range(1..max_dim_size);
    let mut tuples: Vec<(u32, f32)> = vec![];

    for i in 1..=size {
        // make sure the vector is not too large (for performance reasons)
        if tuples.len() == MAX_VALUES_PER_VECTOR {
            break;
        }
        // high probability of skipping a dimension to make the vectors more sparse
        let skip = rnd_gen.gen_bool(0.98);
        if !skip {
            tuples.push((i as u32, rnd_gen.gen_range(VALUE_RANGE) as f32));
        }
    }

    // make sure we have at least one vector
    if tuples.is_empty() {
        tuples.push((
            rnd_gen.gen_range(1..max_dim_size) as u32,
            rnd_gen.gen_range(VALUE_RANGE) as f32,
        ));
    }

    SparseVector::try_from(tuples).unwrap()
}

/// Generates a sparse vector with all dimensions filled
pub fn random_full_sparse_vector<R: Rng + ?Sized>(
    rnd_gen: &mut R,
    max_size: usize,
) -> SparseVector {
    let mut tuples: Vec<(u32, f32)> = Vec::with_capacity(max_size);

    for i in 1..=max_size {
        tuples.push((i as u32, rnd_gen.gen_range(VALUE_RANGE) as f32));
    }

    SparseVector::try_from(tuples).unwrap()
}

/// Generates a sparse vector with only positive values
pub fn random_positive_sparse_vector<R: Rng + ?Sized>(
    rnd_gen: &mut R,
    max_dim_size: usize,
) -> SparseVector {
    let mut vec = random_sparse_vector(rnd_gen, max_dim_size);
    for value in vec.values.iter_mut() {
        *value = value.abs();
    }
    vec
}