Spaces:
Build error
Build error
File size: 5,992 Bytes
84d2a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
use std::collections::BTreeSet;
use std::sync::Arc;
use std::time::Duration;
use futures::future;
use futures::future::try_join_all;
use itertools::{process_results, Itertools};
use segment::data_types::facets::{FacetParams, FacetValue, FacetValueHit};
use segment::types::{Condition, FieldCondition, Filter, Match};
use tokio::runtime::Handle;
use tokio::time::error::Elapsed;
use super::LocalShard;
use crate::collection_manager::holders::segment_holder::LockedSegment;
use crate::common::stopping_guard::StoppingGuard;
use crate::operations::types::{CollectionError, CollectionResult};
impl LocalShard {
/// Returns values with approximate counts for the given facet request.
pub async fn approx_facet(
&self,
request: Arc<FacetParams>,
search_runtime_handle: &Handle,
timeout: Option<Duration>,
) -> CollectionResult<Vec<FacetValueHit>> {
let timeout = timeout.unwrap_or(self.shared_storage_config.search_timeout);
let stopping_guard = StoppingGuard::new();
let spawn_read = |segment: LockedSegment| {
let request = Arc::clone(&request);
let is_stopped = stopping_guard.get_is_stopped();
search_runtime_handle.spawn_blocking(move || {
let get_segment = segment.get();
let read_segment = get_segment.read();
read_segment.facet(&request, &is_stopped)
})
};
let all_reads = {
let segments_lock = self.segments().read();
tokio::time::timeout(
timeout,
try_join_all(
segments_lock
.non_appendable_then_appendable_segments()
.map(spawn_read),
),
)
}
.await
.map_err(|_: Elapsed| CollectionError::timeout(timeout.as_secs() as usize, "facet"))??;
let merged_hits = process_results(all_reads, |reads| {
reads.reduce(|mut acc, map| {
map.into_iter()
.for_each(|(value, count)| *acc.entry(value).or_insert(0) += count);
acc
})
})?;
// We can't just select top values, because we need to aggregate across segments,
// which we can't assume to select the same best top.
//
// We need all values to be able to aggregate correctly across segments
let top_hits = merged_hits
.map(|map| {
map.iter()
.map(|(value, count)| FacetValueHit {
value: value.to_owned(),
count: *count,
})
.collect_vec()
})
.unwrap_or_default();
Ok(top_hits)
}
/// Returns values with exact counts for a given facet request.
pub async fn exact_facet(
&self,
request: Arc<FacetParams>,
search_runtime_handle: &Handle,
timeout: Option<Duration>,
) -> CollectionResult<Vec<FacetValueHit>> {
// To return exact counts we need to consider that the same point can be in different segments if it has different versions.
// So, we need to consider all point ids for a given filter in all segments to do an accurate count.
//
// To do this we will perform exact counts for each of the values in the field.
let timeout = timeout.unwrap_or(self.shared_storage_config.search_timeout);
let instant = std::time::Instant::now();
// Get unique values for the field
let unique_values = self
.unique_values(Arc::clone(&request), search_runtime_handle, timeout)
.await?;
// Make an exact count for each value
let hits_futures = unique_values.into_iter().map(|value| {
let match_value = Filter::new_must(Condition::Field(FieldCondition::new_match(
request.key.clone(),
Match::new_value(From::from(value.clone())),
)));
let filter = Filter::merge_opts(request.filter.clone(), Some(match_value));
async move {
let count = self
.read_filtered(filter.as_ref(), search_runtime_handle)
.await?
.len();
CollectionResult::Ok(FacetValueHit { value, count })
}
});
let hits = tokio::time::timeout(
timeout.saturating_sub(instant.elapsed()),
future::try_join_all(hits_futures),
)
.await
.map_err(|_: Elapsed| CollectionError::timeout(timeout.as_secs() as usize, "facet"))??;
Ok(hits)
}
async fn unique_values(
&self,
request: Arc<FacetParams>,
handle: &Handle,
timeout: Duration,
) -> CollectionResult<BTreeSet<FacetValue>> {
let stopping_guard = StoppingGuard::new();
let spawn_read = |segment: LockedSegment| {
let request = Arc::clone(&request);
let is_stopped = stopping_guard.get_is_stopped();
handle.spawn_blocking(move || {
let get_segment = segment.get();
let read_segment = get_segment.read();
read_segment.unique_values(&request.key, request.filter.as_ref(), &is_stopped)
})
};
let all_reads = {
let segments_lock = self.segments().read();
tokio::time::timeout(
timeout,
try_join_all(
segments_lock
.non_appendable_then_appendable_segments()
.map(spawn_read),
),
)
}
.await
.map_err(|_: Elapsed| CollectionError::timeout(timeout.as_secs() as usize, "facet"))??;
let all_values =
process_results(all_reads, |reads| reads.flatten().collect::<BTreeSet<_>>())?;
Ok(all_values)
}
}
|