Spaces:
Build error
Build error
use std::time::Duration; | |
use async_trait::async_trait; | |
use common::defaults::{self, CONSENSUS_CONFIRM_RETRIES}; | |
use schemars::JsonSchema; | |
use serde::{Deserialize, Serialize}; | |
use tokio::time::sleep; | |
use super::channel_service::ChannelService; | |
use super::remote_shard::RemoteShard; | |
use super::replica_set::ReplicaState; | |
use super::resharding::ReshardKey; | |
use super::shard::{PeerId, ShardId}; | |
use super::CollectionId; | |
use crate::operations::types::{CollectionError, CollectionResult}; | |
pub mod driver; | |
pub mod helpers; | |
pub mod resharding_stream_records; | |
pub mod snapshot; | |
pub mod stream_records; | |
pub mod transfer_tasks_pool; | |
pub mod wal_delta; | |
/// Time between consensus confirmation retries. | |
const CONSENSUS_CONFIRM_RETRY_DELAY: Duration = Duration::from_secs(1); | |
/// Time after which confirming a consensus operation times out. | |
const CONSENSUS_CONFIRM_TIMEOUT: Duration = defaults::CONSENSUS_META_OP_WAIT; | |
pub struct ShardTransfer { | |
pub shard_id: ShardId, | |
/// For resharding, a different target shard ID may be configured | |
/// By default the shard ID on the target peer is the same. | |
// TODO(resharding): expose once we release resharding | |
pub to_shard_id: Option<ShardId>, | |
pub from: PeerId, | |
pub to: PeerId, | |
/// If this flag is true, this is a replication related transfer of shard from 1 peer to another | |
/// Shard on original peer will not be deleted in this case | |
pub sync: bool, | |
/// Method to transfer shard with. `None` to choose automatically. | |
pub method: Option<ShardTransferMethod>, | |
} | |
impl ShardTransfer { | |
pub fn key(&self) -> ShardTransferKey { | |
ShardTransferKey { | |
shard_id: self.shard_id, | |
to_shard_id: self.to_shard_id, | |
from: self.from, | |
to: self.to, | |
} | |
} | |
} | |
pub struct ShardTransferRestart { | |
pub shard_id: ShardId, | |
// TODO(resharding): expose once we release resharding | |
pub to_shard_id: Option<ShardId>, | |
pub from: PeerId, | |
pub to: PeerId, | |
pub method: ShardTransferMethod, | |
} | |
impl ShardTransferRestart { | |
pub fn key(&self) -> ShardTransferKey { | |
ShardTransferKey { | |
shard_id: self.shard_id, | |
to_shard_id: self.to_shard_id, | |
from: self.from, | |
to: self.to, | |
} | |
} | |
} | |
impl From<ShardTransfer> for ShardTransferRestart { | |
fn from(transfer: ShardTransfer) -> Self { | |
Self { | |
shard_id: transfer.shard_id, | |
to_shard_id: transfer.to_shard_id, | |
from: transfer.from, | |
to: transfer.to, | |
method: transfer.method.unwrap_or_default(), | |
} | |
} | |
} | |
/// Unique identifier of a transfer, agnostic of transfer method | |
pub struct ShardTransferKey { | |
pub shard_id: ShardId, | |
// TODO(resharding): expose once we release resharding | |
pub to_shard_id: Option<ShardId>, | |
pub from: PeerId, | |
pub to: PeerId, | |
} | |
impl ShardTransferKey { | |
pub fn check(self, transfer: &ShardTransfer) -> bool { | |
self == transfer.key() | |
} | |
} | |
/// Methods for transferring a shard from one node to another. | |
pub enum ShardTransferMethod { | |
/// Stream all shard records in batches until the whole shard is transferred. | |
StreamRecords, | |
/// Snapshot the shard, transfer and restore it on the receiver. | |
Snapshot, | |
/// Attempt to transfer shard difference by WAL delta. | |
WalDelta, | |
/// Shard transfer for resharding: stream all records in batches until all points are | |
/// transferred. | |
ReshardingStreamRecords, | |
} | |
impl ShardTransferMethod { | |
pub fn is_resharding(&self) -> bool { | |
matches!(self, Self::ReshardingStreamRecords) | |
} | |
} | |
/// Interface to consensus for shard transfer operations. | |
pub trait ShardTransferConsensus: Send + Sync { | |
/// Get the peer ID for the current node. | |
fn this_peer_id(&self) -> PeerId; | |
/// Get all peer IDs, including that of the current node. | |
fn peers(&self) -> Vec<PeerId>; | |
/// Get the current consensus commit and term state. | |
/// | |
/// Returns `(commit, term)`. | |
fn consensus_commit_term(&self) -> (u64, u64); | |
/// After snapshot or WAL delta recovery, propose to switch shard to `Partial` | |
/// | |
/// This is called after shard snapshot or WAL delta recovery has been completed on the remote. | |
/// It submits a proposal to consensus to switch the shard state from `Recovery` to `Partial`. | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
fn recovered_switch_to_partial( | |
&self, | |
transfer_config: &ShardTransfer, | |
collection_id: CollectionId, | |
) -> CollectionResult<()>; | |
/// After snapshot or WAL delta recovery, propose to switch shard to `Partial` and confirm on | |
/// remote shard | |
/// | |
/// This is called after shard snapshot or WAL delta recovery has been completed on the remote. | |
/// It submits a proposal to consensus to switch the shard state from `Recovery` to `Partial`. | |
/// | |
/// This method also confirms consensus applied the operation before returning by asserting the | |
/// change is propagated on a remote shard. For the next stage only the remote needs to be in | |
/// `Partial` to accept updates, we therefore assert the state on the remote explicitly rather | |
/// than asserting locally. If it fails, it will be retried for up to | |
/// `CONSENSUS_CONFIRM_RETRIES` times. | |
/// | |
/// # Cancel safety | |
/// | |
/// This method is cancel safe. | |
async fn recovered_switch_to_partial_confirm_remote( | |
&self, | |
transfer_config: &ShardTransfer, | |
collection_id: &CollectionId, | |
remote_shard: &RemoteShard, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`recovered_switch_to_partial_confirm_remote` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
result = self.recovered_switch_to_partial(transfer_config, collection_id.to_string()); | |
if let Err(err) = &result { | |
log::error!("Failed to propose recovered operation to consensus: {err}"); | |
continue; | |
} | |
log::trace!("Wait for remote shard to reach `Partial` state"); | |
result = remote_shard | |
.wait_for_shard_state( | |
collection_id, | |
transfer_config.shard_id, | |
ReplicaState::Partial, | |
CONSENSUS_CONFIRM_TIMEOUT, | |
) | |
.await | |
.map(|_| ()); | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!("Failed to confirm recovered operation on consensus: {err}"); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to confirm recovered operation on consensus after {CONSENSUS_CONFIRM_RETRIES} retries: {err}", | |
)) | |
}) | |
} | |
/// Propose to start a shard transfer | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
async fn start_shard_transfer( | |
&self, | |
transfer_config: ShardTransfer, | |
collection_id: CollectionId, | |
) -> CollectionResult<()>; | |
/// Propose to start a shard transfer | |
/// | |
/// This internally confirms and retries a few times if needed to ensure consensus picks up the | |
/// operation. | |
async fn start_shard_transfer_confirm_and_retry( | |
&self, | |
transfer_config: &ShardTransfer, | |
collection_id: &str, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`start_shard_transfer_confirm_and_retry` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
log::trace!("Propose and confirm shard transfer start operation"); | |
result = self | |
.start_shard_transfer(transfer_config.clone(), collection_id.into()) | |
.await; | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!( | |
"Failed to confirm start shard transfer operation on consensus: {err}" | |
); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to start shard transfer through consensus \ | |
after {CONSENSUS_CONFIRM_RETRIES} retries: {err}" | |
)) | |
}) | |
} | |
/// Propose to restart a shard transfer with a different given configuration | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
async fn restart_shard_transfer( | |
&self, | |
transfer_config: ShardTransfer, | |
collection_id: CollectionId, | |
) -> CollectionResult<()>; | |
/// Propose to restart a shard transfer with a different given configuration | |
/// | |
/// This internally confirms and retries a few times if needed to ensure consensus picks up the | |
/// operation. | |
async fn restart_shard_transfer_confirm_and_retry( | |
&self, | |
transfer_config: &ShardTransfer, | |
collection_id: &CollectionId, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`restart_shard_transfer_confirm_and_retry` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
log::trace!("Propose and confirm shard transfer restart operation"); | |
result = self | |
.restart_shard_transfer(transfer_config.clone(), collection_id.into()) | |
.await; | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!( | |
"Failed to confirm restart shard transfer operation on consensus: {err}" | |
); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to restart shard transfer through consensus \ | |
after {CONSENSUS_CONFIRM_RETRIES} retries: {err}" | |
)) | |
}) | |
} | |
/// Propose to abort a shard transfer | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
async fn abort_shard_transfer( | |
&self, | |
transfer: ShardTransferKey, | |
collection_id: CollectionId, | |
reason: &str, | |
) -> CollectionResult<()>; | |
/// Propose to abort a shard transfer | |
/// | |
/// This internally confirms and retries a few times if needed to ensure consensus picks up the | |
/// operation. | |
async fn abort_shard_transfer_confirm_and_retry( | |
&self, | |
transfer: ShardTransferKey, | |
collection_id: &CollectionId, | |
reason: &str, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`abort_shard_transfer_confirm_and_retry` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
log::trace!("Propose and confirm shard transfer abort operation"); | |
result = self | |
.abort_shard_transfer(transfer, collection_id.into(), reason) | |
.await; | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!( | |
"Failed to confirm abort shard transfer operation on consensus: {err}" | |
); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to abort shard transfer through consensus \ | |
after {CONSENSUS_CONFIRM_RETRIES} retries: {err}" | |
)) | |
}) | |
} | |
/// Set the shard replica state on this peer through consensus | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
async fn set_shard_replica_set_state( | |
&self, | |
collection_id: CollectionId, | |
shard_id: ShardId, | |
state: ReplicaState, | |
from_state: Option<ReplicaState>, | |
) -> CollectionResult<()>; | |
/// Propose to set the shard replica state on this peer through consensus | |
/// | |
/// This internally confirms and retries a few times if needed to ensure consensus picks up the | |
/// operation. | |
async fn set_shard_replica_set_state_confirm_and_retry( | |
&self, | |
collection_id: &CollectionId, | |
shard_id: ShardId, | |
state: ReplicaState, | |
from_state: Option<ReplicaState>, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`set_shard_replica_set_state_confirm_and_retry` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
log::trace!("Propose and confirm set shard replica set state"); | |
result = self | |
.set_shard_replica_set_state(collection_id.clone(), shard_id, state, from_state) | |
.await; | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!( | |
"Failed to confirm set shard replica set state operation on consensus: {err}" | |
); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to set shard replica set state through consensus \ | |
after {CONSENSUS_CONFIRM_RETRIES} retries: {err}" | |
)) | |
}) | |
} | |
/// Propose to commit the read hash ring. | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
async fn commit_read_hashring( | |
&self, | |
collection_id: CollectionId, | |
reshard_key: ReshardKey, | |
) -> CollectionResult<()>; | |
/// Propose to commit the read hash ring, then confirm or retry. | |
/// | |
/// This internally confirms and retries a few times if needed to ensure consensus picks up the | |
/// operation. | |
async fn commit_read_hashring_confirm_and_retry( | |
&self, | |
collection_id: &CollectionId, | |
reshard_key: &ReshardKey, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`commit_read_hashring_confirm_and_retry` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
log::trace!("Propose and confirm commit read hashring operation"); | |
result = self | |
.commit_read_hashring(collection_id.into(), reshard_key.clone()) | |
.await; | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!( | |
"Failed to confirm commit read hashring operation on consensus: {err}" | |
); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to commit read hashring through consensus \ | |
after {CONSENSUS_CONFIRM_RETRIES} retries: {err}" | |
)) | |
}) | |
} | |
/// Propose to commit the write hash ring. | |
/// | |
/// # Warning | |
/// | |
/// This only submits a proposal to consensus. Calling this does not guarantee that consensus | |
/// will actually apply the operation across the cluster. | |
async fn commit_write_hashring( | |
&self, | |
collection_id: CollectionId, | |
reshard_key: ReshardKey, | |
) -> CollectionResult<()>; | |
/// Propose to commit the write hash ring, then confirm or retry. | |
/// | |
/// This internally confirms and retries a few times if needed to ensure consensus picks up the | |
/// operation. | |
async fn commit_write_hashring_confirm_and_retry( | |
&self, | |
collection_id: &CollectionId, | |
reshard_key: &ReshardKey, | |
) -> CollectionResult<()> { | |
let mut result = Err(CollectionError::service_error( | |
"`commit_write_hashring_confirm_and_retry` exit without attempting any work, \ | |
this is a programming error", | |
)); | |
for attempt in 0..CONSENSUS_CONFIRM_RETRIES { | |
if attempt > 0 { | |
sleep(CONSENSUS_CONFIRM_RETRY_DELAY).await; | |
} | |
log::trace!("Propose and confirm commit write hashring operation"); | |
result = self | |
.commit_write_hashring(collection_id.into(), reshard_key.clone()) | |
.await; | |
match &result { | |
Ok(()) => break, | |
Err(err) => { | |
log::error!( | |
"Failed to confirm commit write hashring operation on consensus: {err}" | |
); | |
continue; | |
} | |
} | |
} | |
result.map_err(|err| { | |
CollectionError::service_error(format!( | |
"Failed to commit write hashring through consensus \ | |
after {CONSENSUS_CONFIRM_RETRIES} retries: {err}" | |
)) | |
}) | |
} | |
/// Wait for all other peers to reach the current consensus | |
/// | |
/// This will take the current consensus state of this node. It then explicitly awaits on all | |
/// other nodes to reach this consensus state. | |
/// | |
/// # Errors | |
/// | |
/// This errors if: | |
/// - any of the peers is not on the same term | |
/// - waiting takes longer than the specified timeout | |
/// - any of the peers cannot be reached | |
/// | |
/// # Cancel safety | |
/// | |
/// This method is cancel safe. | |
async fn await_consensus_sync(&self, channel_service: &ChannelService) -> CollectionResult<()> { | |
let other_peer_count = channel_service.id_to_address.read().len().saturating_sub(1); | |
if other_peer_count == 0 { | |
log::warn!("There are no other peers, skipped synchronizing consensus"); | |
return Ok(()); | |
} | |
let (commit, term) = self.consensus_commit_term(); | |
log::trace!( | |
"Waiting on {other_peer_count} peer(s) to reach consensus (commit: {commit}, term: {term}) before finalizing shard transfer" | |
); | |
channel_service | |
.await_commit_on_all_peers( | |
self.this_peer_id(), | |
commit, | |
term, | |
defaults::CONSENSUS_META_OP_WAIT, | |
) | |
.await | |
} | |
} | |