Gouzi Mohaled
Ajout du dossier lib
84d2a97
raw
history blame
2.25 kB
use permutation_iterator::Permutor;
pub const QUANTILE_SAMPLE_SIZE: usize = 100_000;
pub(crate) fn find_min_max_from_iter<'a>(
iter: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
) -> (f32, f32) {
iter.fold((f32::MAX, f32::MIN), |(mut min, mut max), vector| {
for &value in vector.as_ref() {
if value < min {
min = value;
}
if value > max {
max = value;
}
}
(min, max)
})
}
pub(crate) fn find_quantile_interval<'a>(
vector_data: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
dim: usize,
count: usize,
quantile: f32,
) -> Option<(f32, f32)> {
if count < 127 || quantile >= 1.0 {
return None;
}
let slice_size = std::cmp::min(count, QUANTILE_SAMPLE_SIZE);
let permutor = Permutor::new(count as u64);
let mut selected_vectors: Vec<usize> = permutor.map(|i| i as usize).take(slice_size).collect();
selected_vectors.sort_unstable();
let mut data_slice = Vec::with_capacity(slice_size * dim);
let mut selected_index: usize = 0;
for (vector_index, vector_data) in vector_data.into_iter().enumerate() {
if vector_index == selected_vectors[selected_index] {
data_slice.extend_from_slice(vector_data.as_ref());
selected_index += 1;
if selected_index == slice_size {
break;
}
}
}
let data_slice_len = data_slice.len();
if data_slice_len < 4 {
return None;
}
let cut_index = std::cmp::min(
(data_slice_len - 1) / 2,
(slice_size as f32 * (1.0 - quantile) / 2.0) as usize,
);
let cut_index = std::cmp::max(cut_index, 1);
let comparator = |a: &f32, b: &f32| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
let (selected_values, _, _) =
data_slice.select_nth_unstable_by(data_slice_len - cut_index, comparator);
let (_, _, selected_values) = selected_values.select_nth_unstable_by(cut_index, comparator);
if selected_values.len() < 2 {
return None;
}
let selected_values = [selected_values];
Some(find_min_max_from_iter(
selected_values.iter().map(|v| &v[..]),
))
}