Spaces:

reztilop
/

colibri.qdrant

Build error

File size: 11,009 Bytes

84d2a97

use std::collections::HashSet;
use std::path::Path;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::time::Duration;

use async_trait::async_trait;
use common::counter::hardware_accumulator::HwMeasurementAcc;
use common::tar_ext;
use common::types::TelemetryDetail;
use segment::data_types::facets::{FacetParams, FacetResponse};
use segment::data_types::order_by::OrderBy;
use segment::types::{
    ExtendedPointId, Filter, PointIdType, ScoredPoint, SnapshotFormat, WithPayload,
    WithPayloadInterface, WithVector,
};
use tokio::runtime::Handle;
use tokio::sync::{oneshot, RwLock};
use tokio::time::timeout;

use super::update_tracker::UpdateTracker;
use crate::operations::operation_effect::{
    EstimateOperationEffectArea, OperationEffectArea, PointsOperationEffect,
};
use crate::operations::types::{
    CollectionError, CollectionInfo, CollectionResult, CoreSearchRequestBatch,
    CountRequestInternal, CountResult, PointRequestInternal, RecordInternal, UpdateResult,
};
use crate::operations::universal_query::shard_query::{ShardQueryRequest, ShardQueryResponse};
use crate::operations::OperationWithClockTag;
use crate::shards::local_shard::LocalShard;
use crate::shards::shard_trait::ShardOperation;
use crate::shards::telemetry::LocalShardTelemetry;
use crate::update_handler::UpdateSignal;

type ChangedPointsSet = Arc<RwLock<HashSet<PointIdType>>>;

/// ProxyShard
///
/// ProxyShard is a wrapper type for a LocalShard.
///
/// It can be used to provide all read and write operations while the wrapped shard is being transferred to another node.
/// It keeps track of changed points during the shard transfer to assure consistency.
pub struct ProxyShard {
    wrapped_shard: LocalShard,
    changed_points: ChangedPointsSet,
    pub changed_alot: AtomicBool,
}

/// Max number of updates tracked to synchronize after the transfer.
const MAX_CHANGES_TRACKED_COUNT: usize = 10_000;

/// How much time can we wait for the update queue to be empty.
/// We don't want false positive here, so it should be large.
/// If the queue stuck - it means something wrong with application logic.
const UPDATE_QUEUE_CLEAR_TIMEOUT: Duration = Duration::from_secs(1);
const UPDATE_QUEUE_CLEAR_MAX_TIMEOUT: Duration = Duration::from_secs(128);

impl ProxyShard {
    #[allow(unused)]
    pub async fn new(wrapped_shard: LocalShard) -> Self {
        let res = Self {
            wrapped_shard,
            changed_points: Default::default(),
            changed_alot: Default::default(),
        };
        res.reinit_changelog().await;
        res
    }

    /// Forward `create_snapshot` to `wrapped_shard`
    pub async fn create_snapshot(
        &self,
        temp_path: &Path,
        tar: &tar_ext::BuilderExt,
        format: SnapshotFormat,
        save_wal: bool,
    ) -> CollectionResult<()> {
        self.wrapped_shard
            .create_snapshot(temp_path, tar, format, save_wal)
            .await
    }

    pub async fn on_optimizer_config_update(&self) -> CollectionResult<()> {
        self.wrapped_shard.on_optimizer_config_update().await
    }

    pub fn trigger_optimizers(&self) {
        // TODO: we might want to defer this trigger until we unproxy
        self.wrapped_shard.trigger_optimizers();
    }

    pub async fn reinit_changelog(&self) -> CollectionResult<()> {
        // Blocks updates in the wrapped shard.
        let mut changed_points_guard = self.changed_points.write().await;
        // Clear the update queue
        let mut attempt = 1;
        loop {
            let (tx, rx) = oneshot::channel();
            let plunger = UpdateSignal::Plunger(tx);
            self.wrapped_shard
                .update_sender
                .load()
                .send(plunger)
                .await?;
            let attempt_timeout = UPDATE_QUEUE_CLEAR_TIMEOUT * (2_u32).pow(attempt);
            // It is possible, that the queue is recreated while we are waiting for plunger.
            // So we will timeout and try again
            if timeout(attempt_timeout, rx).await.is_err() {
                log::warn!("Timeout {} while waiting for the wrapped shard to finish the update queue, retrying", attempt_timeout.as_secs());
                attempt += 1;
                if attempt_timeout > UPDATE_QUEUE_CLEAR_MAX_TIMEOUT {
                    return Err(CollectionError::service_error(
                        "Timeout while waiting for the wrapped shard to finish the update queue"
                            .to_string(),
                    ));
                }
                continue;
            }
            break;
        }
        // Update queue is clear now
        // Clear the changed_points set
        changed_points_guard.clear();

        // Clear changed_alot flag
        self.changed_alot
            .store(false, std::sync::atomic::Ordering::Relaxed);
        Ok(())
    }

    pub fn get_telemetry_data(&self, detail: TelemetryDetail) -> LocalShardTelemetry {
        self.wrapped_shard.get_telemetry_data(detail)
    }

    pub fn update_tracker(&self) -> &UpdateTracker {
        self.wrapped_shard.update_tracker()
    }
}

#[async_trait]
impl ShardOperation for ProxyShard {
    /// Update `wrapped_shard` while keeping track of the changed points
    ///
    /// # Cancel safety
    ///
    /// This method is *not* cancel safe.
    async fn update(
        &self,
        operation: OperationWithClockTag,
        wait: bool,
    ) -> CollectionResult<UpdateResult> {
        // If we modify `self.changed_points`, we *have to* (?) execute `local_shard` update
        // to completion, so this method is not cancel safe.

        let local_shard = &self.wrapped_shard;
        let estimate_effect = operation.operation.estimate_effect_area();
        let points_operation_effect: PointsOperationEffect = match estimate_effect {
            OperationEffectArea::Empty => PointsOperationEffect::Empty,
            OperationEffectArea::Points(points) => PointsOperationEffect::Some(points),
            OperationEffectArea::Filter(filter) => {
                let cardinality = local_shard.estimate_cardinality(Some(&filter))?;
                // validate the size of the change set before retrieving it
                if cardinality.max > MAX_CHANGES_TRACKED_COUNT {
                    PointsOperationEffect::Many
                } else {
                    let runtime_handle = self.wrapped_shard.search_runtime.clone();
                    let points = local_shard
                        .read_filtered(Some(&filter), &runtime_handle)
                        .await?;
                    PointsOperationEffect::Some(points.into_iter().collect())
                }
            }
        };

        {
            let mut changed_points_guard = self.changed_points.write().await;

            match points_operation_effect {
                PointsOperationEffect::Empty => {}
                PointsOperationEffect::Some(points) => {
                    for point in points {
                        // points updates are recorded but never trigger in `changed_alot`
                        changed_points_guard.insert(point);
                    }
                }
                PointsOperationEffect::Many => {
                    self.changed_alot
                        .store(true, std::sync::atomic::Ordering::Relaxed);
                }
            }

            // Shard update is within a write lock scope, because we need a way to block the shard updates
            // during the transfer restart and finalization.
            local_shard.update(operation, wait).await
        }
    }

    /// Forward read-only `scroll_by` to `wrapped_shard`
    async fn scroll_by(
        &self,
        offset: Option<ExtendedPointId>,
        limit: usize,
        with_payload_interface: &WithPayloadInterface,
        with_vector: &WithVector,
        filter: Option<&Filter>,
        search_runtime_handle: &Handle,
        order_by: Option<&OrderBy>,
        timeout: Option<Duration>,
    ) -> CollectionResult<Vec<RecordInternal>> {
        let local_shard = &self.wrapped_shard;
        local_shard
            .scroll_by(
                offset,
                limit,
                with_payload_interface,
                with_vector,
                filter,
                search_runtime_handle,
                order_by,
                timeout,
            )
            .await
    }

    /// Forward read-only `info` to `wrapped_shard`
    async fn info(&self) -> CollectionResult<CollectionInfo> {
        let local_shard = &self.wrapped_shard;
        local_shard.info().await
    }

    /// Forward read-only `search` to `wrapped_shard`
    async fn core_search(
        &self,
        request: Arc<CoreSearchRequestBatch>,
        search_runtime_handle: &Handle,
        timeout: Option<Duration>,
        hw_measurement_acc: &HwMeasurementAcc,
    ) -> CollectionResult<Vec<Vec<ScoredPoint>>> {
        let local_shard = &self.wrapped_shard;
        local_shard
            .core_search(request, search_runtime_handle, timeout, hw_measurement_acc)
            .await
    }

    /// Forward read-only `count` to `wrapped_shard`
    async fn count(
        &self,
        request: Arc<CountRequestInternal>,
        search_runtime_handle: &Handle,
        timeout: Option<Duration>,
        hw_measurement_acc: &HwMeasurementAcc,
    ) -> CollectionResult<CountResult> {
        let local_shard = &self.wrapped_shard;
        local_shard
            .count(request, search_runtime_handle, timeout, hw_measurement_acc)
            .await
    }

    /// Forward read-only `retrieve` to `wrapped_shard`
    async fn retrieve(
        &self,
        request: Arc<PointRequestInternal>,
        with_payload: &WithPayload,
        with_vector: &WithVector,
        search_runtime_handle: &Handle,
        timeout: Option<Duration>,
    ) -> CollectionResult<Vec<RecordInternal>> {
        let local_shard = &self.wrapped_shard;
        local_shard
            .retrieve(
                request,
                with_payload,
                with_vector,
                search_runtime_handle,
                timeout,
            )
            .await
    }

    /// Forward read-only `query` to `wrapped_shard`
    async fn query_batch(
        &self,
        request: Arc<Vec<ShardQueryRequest>>,
        search_runtime_handle: &Handle,
        timeout: Option<Duration>,
        hw_measurement_acc: &HwMeasurementAcc,
    ) -> CollectionResult<Vec<ShardQueryResponse>> {
        let local_shard = &self.wrapped_shard;
        local_shard
            .query_batch(request, search_runtime_handle, timeout, hw_measurement_acc)
            .await
    }

    async fn facet(
        &self,
        request: Arc<FacetParams>,
        search_runtime_handle: &Handle,
        timeout: Option<Duration>,
    ) -> CollectionResult<FacetResponse> {
        let local_shard = &self.wrapped_shard;
        local_shard
            .facet(request, search_runtime_handle, timeout)
            .await
    }
}