Spaces:

reztilop
/

colibri.qdrant

Build error

colibri.qdrant / lib /collection /src /shards /transfer /mod.rs

Gouzi Mohaled

Ajout du dossier lib

84d2a97 7 months ago

21 kB

	use std::time::Duration;

	use async_trait::async_trait;
	use common::defaults::{self, CONSENSUS_CONFIRM_RETRIES};
	use schemars::JsonSchema;
	use serde::{Deserialize, Serialize};
	use tokio::time::sleep;

	use super::channel_service::ChannelService;
	use super::remote_shard::RemoteShard;
	use super::replica_set::ReplicaState;
	use super::resharding::ReshardKey;
	use super::shard::{PeerId, ShardId};
	use super::CollectionId;
	use crate::operations::types::{CollectionError, CollectionResult};

	pub mod driver;
	pub mod helpers;
	pub mod resharding_stream_records;
	pub mod snapshot;
	pub mod stream_records;
	pub mod transfer_tasks_pool;
	pub mod wal_delta;

	/// Time between consensus confirmation retries.
	const CONSENSUS_CONFIRM_RETRY_DELAY: Duration = Duration::from_secs(1);

	/// Time after which confirming a consensus operation times out.
	const CONSENSUS_CONFIRM_TIMEOUT: Duration = defaults::CONSENSUS_META_OP_WAIT;

	#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
	pub struct ShardTransfer {
	pub shard_id: ShardId,
	/// For resharding, a different target shard ID may be configured
	/// By default the shard ID on the target peer is the same.
	#[serde(default, skip_serializing_if = "Option::is_none")]
	#[schemars(skip)] // TODO(resharding): expose once we release resharding
	pub to_shard_id: Option<ShardId>,
	pub from: PeerId,
	pub to: PeerId,
	/// If this flag is true, this is a replication related transfer of shard from 1 peer to another
	/// Shard on original peer will not be deleted in this case
	pub sync: bool,
	/// Method to transfer shard with. `None` to choose automatically.
	#[serde(default)]
	pub method: Option<ShardTransferMethod>,
	}

	impl ShardTransfer {
	pub fn key(&self) -> ShardTransferKey {
	ShardTransferKey {
	shard_id: self.shard_id,
	to_shard_id: self.to_shard_id,
	from: self.from,
	to: self.to,
	}
	}
	}

	#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
	pub struct ShardTransferRestart {
	pub shard_id: ShardId,
	#[serde(default, skip_serializing_if = "Option::is_none")]
	#[schemars(skip)] // TODO(resharding): expose once we release resharding
	pub to_shard_id: Option<ShardId>,
	pub from: PeerId,
	pub to: PeerId,
	pub method: ShardTransferMethod,
	}

	impl ShardTransferRestart {
	pub fn key(&self) -> ShardTransferKey {
	ShardTransferKey {
	shard_id: self.shard_id,
	to_shard_id: self.to_shard_id,
	from: self.from,
	to: self.to,
	}
	}
	}

	impl From<ShardTransfer> for ShardTransferRestart {
	fn from(transfer: ShardTransfer) -> Self {
	Self {
	shard_id: transfer.shard_id,
	to_shard_id: transfer.to_shard_id,
	from: transfer.from,
	to: transfer.to,
	method: transfer.method.unwrap_or_default(),
	}
	}
	}

	/// Unique identifier of a transfer, agnostic of transfer method
	#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
	pub struct ShardTransferKey {
	pub shard_id: ShardId,
	#[serde(default, skip_serializing_if = "Option::is_none")]
	#[schemars(skip)] // TODO(resharding): expose once we release resharding
	pub to_shard_id: Option<ShardId>,
	pub from: PeerId,
	pub to: PeerId,
	}

	impl ShardTransferKey {
	pub fn check(self, transfer: &ShardTransfer) -> bool {
	self == transfer.key()
	}
	}

	/// Methods for transferring a shard from one node to another.
	#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Default, Serialize, Deserialize, JsonSchema)]
	#[serde(rename_all = "snake_case")]
	pub enum ShardTransferMethod {
	/// Stream all shard records in batches until the whole shard is transferred.
	#[default]
	StreamRecords,
	/// Snapshot the shard, transfer and restore it on the receiver.
	Snapshot,
	/// Attempt to transfer shard difference by WAL delta.
	WalDelta,
	/// Shard transfer for resharding: stream all records in batches until all points are
	/// transferred.
	#[schemars(skip)]
	ReshardingStreamRecords,
	}

	impl ShardTransferMethod {
	pub fn is_resharding(&self) -> bool {
	matches!(self, Self::ReshardingStreamRecords)
	}
	}

	/// Interface to consensus for shard transfer operations.
	#[async_trait]
	pub trait ShardTransferConsensus: Send + Sync {
	/// Get the peer ID for the current node.
	fn this_peer_id(&self) -> PeerId;

	/// Get all peer IDs, including that of the current node.
	fn peers(&self) -> Vec<PeerId>;

	/// Get the current consensus commit and term state.
	///
	/// Returns `(commit, term)`.
	fn consensus_commit_term(&self) -> (u64, u64);

	/// After snapshot or WAL delta recovery, propose to switch shard to `Partial`
	///
	/// This is called after shard snapshot or WAL delta recovery has been completed on the remote.
	/// It submits a proposal to consensus to switch the shard state from `Recovery` to `Partial`.
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	fn recovered_switch_to_partial(
	&self,
	transfer_config: &ShardTransfer,
	collection_id: CollectionId,
	) -> CollectionResult<()>;

	/// After snapshot or WAL delta recovery, propose to switch shard to `Partial` and confirm on
	/// remote shard
	///
	/// This is called after shard snapshot or WAL delta recovery has been completed on the remote.
	/// It submits a proposal to consensus to switch the shard state from `Recovery` to `Partial`.
	///
	/// This method also confirms consensus applied the operation before returning by asserting the
	/// change is propagated on a remote shard. For the next stage only the remote needs to be in
	/// `Partial` to accept updates, we therefore assert the state on the remote explicitly rather
	/// than asserting locally. If it fails, it will be retried for up to
	/// `CONSENSUS_CONFIRM_RETRIES` times.
	///
	/// # Cancel safety
	///
	/// This method is cancel safe.
	async fn recovered_switch_to_partial_confirm_remote(
	&self,
	transfer_config: &ShardTransfer,
	collection_id: &CollectionId,
	remote_shard: &RemoteShard,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`recovered_switch_to_partial_confirm_remote` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	result = self.recovered_switch_to_partial(transfer_config, collection_id.to_string());

	if let Err(err) = &result {
	log::error!("Failed to propose recovered operation to consensus: {err}");
	continue;
	}

	log::trace!("Wait for remote shard to reach `Partial` state");

	result = remote_shard
	.wait_for_shard_state(
	collection_id,
	transfer_config.shard_id,
	ReplicaState::Partial,
	CONSENSUS_CONFIRM_TIMEOUT,
	)
	.await
	.map(\|_\| ());

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!("Failed to confirm recovered operation on consensus: {err}");
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to confirm recovered operation on consensus after {CONSENSUS_CONFIRM_RETRIES} retries: {err}",
	))
	})
	}

	/// Propose to start a shard transfer
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	async fn start_shard_transfer(
	&self,
	transfer_config: ShardTransfer,
	collection_id: CollectionId,
	) -> CollectionResult<()>;

	/// Propose to start a shard transfer
	///
	/// This internally confirms and retries a few times if needed to ensure consensus picks up the
	/// operation.
	async fn start_shard_transfer_confirm_and_retry(
	&self,
	transfer_config: &ShardTransfer,
	collection_id: &str,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`start_shard_transfer_confirm_and_retry` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	log::trace!("Propose and confirm shard transfer start operation");
	result = self
	.start_shard_transfer(transfer_config.clone(), collection_id.into())
	.await;

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!(
	"Failed to confirm start shard transfer operation on consensus: {err}"
	);
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to start shard transfer through consensus \
	after {CONSENSUS_CONFIRM_RETRIES} retries: {err}"
	))
	})
	}

	/// Propose to restart a shard transfer with a different given configuration
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	async fn restart_shard_transfer(
	&self,
	transfer_config: ShardTransfer,
	collection_id: CollectionId,
	) -> CollectionResult<()>;

	/// Propose to restart a shard transfer with a different given configuration
	///
	/// This internally confirms and retries a few times if needed to ensure consensus picks up the
	/// operation.
	async fn restart_shard_transfer_confirm_and_retry(
	&self,
	transfer_config: &ShardTransfer,
	collection_id: &CollectionId,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`restart_shard_transfer_confirm_and_retry` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	log::trace!("Propose and confirm shard transfer restart operation");
	result = self
	.restart_shard_transfer(transfer_config.clone(), collection_id.into())
	.await;

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!(
	"Failed to confirm restart shard transfer operation on consensus: {err}"
	);
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to restart shard transfer through consensus \
	after {CONSENSUS_CONFIRM_RETRIES} retries: {err}"
	))
	})
	}

	/// Propose to abort a shard transfer
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	async fn abort_shard_transfer(
	&self,
	transfer: ShardTransferKey,
	collection_id: CollectionId,
	reason: &str,
	) -> CollectionResult<()>;

	/// Propose to abort a shard transfer
	///
	/// This internally confirms and retries a few times if needed to ensure consensus picks up the
	/// operation.
	async fn abort_shard_transfer_confirm_and_retry(
	&self,
	transfer: ShardTransferKey,
	collection_id: &CollectionId,
	reason: &str,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`abort_shard_transfer_confirm_and_retry` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	log::trace!("Propose and confirm shard transfer abort operation");
	result = self
	.abort_shard_transfer(transfer, collection_id.into(), reason)
	.await;

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!(
	"Failed to confirm abort shard transfer operation on consensus: {err}"
	);
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to abort shard transfer through consensus \
	after {CONSENSUS_CONFIRM_RETRIES} retries: {err}"
	))
	})
	}

	/// Set the shard replica state on this peer through consensus
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	async fn set_shard_replica_set_state(
	&self,
	collection_id: CollectionId,
	shard_id: ShardId,
	state: ReplicaState,
	from_state: Option<ReplicaState>,
	) -> CollectionResult<()>;

	/// Propose to set the shard replica state on this peer through consensus
	///
	/// This internally confirms and retries a few times if needed to ensure consensus picks up the
	/// operation.
	async fn set_shard_replica_set_state_confirm_and_retry(
	&self,
	collection_id: &CollectionId,
	shard_id: ShardId,
	state: ReplicaState,
	from_state: Option<ReplicaState>,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`set_shard_replica_set_state_confirm_and_retry` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	log::trace!("Propose and confirm set shard replica set state");
	result = self
	.set_shard_replica_set_state(collection_id.clone(), shard_id, state, from_state)
	.await;

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!(
	"Failed to confirm set shard replica set state operation on consensus: {err}"
	);
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to set shard replica set state through consensus \
	after {CONSENSUS_CONFIRM_RETRIES} retries: {err}"
	))
	})
	}

	/// Propose to commit the read hash ring.
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	async fn commit_read_hashring(
	&self,
	collection_id: CollectionId,
	reshard_key: ReshardKey,
	) -> CollectionResult<()>;

	/// Propose to commit the read hash ring, then confirm or retry.
	///
	/// This internally confirms and retries a few times if needed to ensure consensus picks up the
	/// operation.
	async fn commit_read_hashring_confirm_and_retry(
	&self,
	collection_id: &CollectionId,
	reshard_key: &ReshardKey,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`commit_read_hashring_confirm_and_retry` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	log::trace!("Propose and confirm commit read hashring operation");
	result = self
	.commit_read_hashring(collection_id.into(), reshard_key.clone())
	.await;

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!(
	"Failed to confirm commit read hashring operation on consensus: {err}"
	);
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to commit read hashring through consensus \
	after {CONSENSUS_CONFIRM_RETRIES} retries: {err}"
	))
	})
	}

	/// Propose to commit the write hash ring.
	///
	/// # Warning
	///
	/// This only submits a proposal to consensus. Calling this does not guarantee that consensus
	/// will actually apply the operation across the cluster.
	async fn commit_write_hashring(
	&self,
	collection_id: CollectionId,
	reshard_key: ReshardKey,
	) -> CollectionResult<()>;

	/// Propose to commit the write hash ring, then confirm or retry.
	///
	/// This internally confirms and retries a few times if needed to ensure consensus picks up the
	/// operation.
	async fn commit_write_hashring_confirm_and_retry(
	&self,
	collection_id: &CollectionId,
	reshard_key: &ReshardKey,
	) -> CollectionResult<()> {
	let mut result = Err(CollectionError::service_error(
	"`commit_write_hashring_confirm_and_retry` exit without attempting any work, \
	this is a programming error",
	));

	for attempt in 0..CONSENSUS_CONFIRM_RETRIES {
	if attempt > 0 {
	sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await;
	}

	log::trace!("Propose and confirm commit write hashring operation");
	result = self
	.commit_write_hashring(collection_id.into(), reshard_key.clone())
	.await;

	match &result {
	Ok(()) => break,
	Err(err) => {
	log::error!(
	"Failed to confirm commit write hashring operation on consensus: {err}"
	);
	continue;
	}
	}
	}

	result.map_err(\|err\| {
	CollectionError::service_error(format!(
	"Failed to commit write hashring through consensus \
	after {CONSENSUS_CONFIRM_RETRIES} retries: {err}"
	))
	})
	}

	/// Wait for all other peers to reach the current consensus
	///
	/// This will take the current consensus state of this node. It then explicitly awaits on all
	/// other nodes to reach this consensus state.
	///
	/// # Errors
	///
	/// This errors if:
	/// - any of the peers is not on the same term
	/// - waiting takes longer than the specified timeout
	/// - any of the peers cannot be reached
	///
	/// # Cancel safety
	///
	/// This method is cancel safe.
	async fn await_consensus_sync(&self, channel_service: &ChannelService) -> CollectionResult<()> {
	let other_peer_count = channel_service.id_to_address.read().len().saturating_sub(1);
	if other_peer_count == 0 {
	log::warn!("There are no other peers, skipped synchronizing consensus");
	return Ok(());
	}

	let (commit, term) = self.consensus_commit_term();
	log::trace!(
	"Waiting on {other_peer_count} peer(s) to reach consensus (commit: {commit}, term: {term}) before finalizing shard transfer"
	);
	channel_service
	.await_commit_on_all_peers(
	self.this_peer_id(),
	commit,
	term,
	defaults::CONSENSUS_META_OP_WAIT,
	)
	.await
	}
	}