Spaces:
Build error
Build error
use std::fmt::Debug; | |
use std::sync::Arc; | |
use parking_lot::{Mutex as ParkingMutex, MutexGuard as ParkingMutexGuard}; | |
use thiserror::Error; | |
use tokio::sync::Mutex; | |
use crate::operations::{ClockTag, OperationWithClockTag}; | |
use crate::shards::local_shard::clock_map::{ClockMap, RecoveryPoint}; | |
use crate::wal::SerdeWal; | |
pub type LockedWal = Arc<ParkingMutex<SerdeWal<OperationWithClockTag>>>; | |
/// A WAL that is recoverable, with operations having clock tags and a corresponding clock map. | |
pub struct RecoverableWal { | |
pub(super) wal: LockedWal, | |
/// Map of all highest seen clocks for each peer and clock ID. | |
pub(super) newest_clocks: Arc<Mutex<ClockMap>>, | |
/// Map of all clocks and ticks that are cut off. | |
/// | |
/// Clock ticks equal to those in this map are still recoverable, while clock ticks below those | |
/// in this map are not. | |
/// | |
/// This means two things: | |
/// - this WAL has at least all these clock versions | |
/// - (so if we advance these clocks, we have to advance `newest_clocks` as well) | |
/// - this WAL cannot resolve any delta below any of these clocks | |
pub(super) oldest_clocks: Arc<Mutex<ClockMap>>, | |
} | |
impl RecoverableWal { | |
pub fn new( | |
wal: LockedWal, | |
highest_clocks: Arc<Mutex<ClockMap>>, | |
cutoff_clocks: Arc<Mutex<ClockMap>>, | |
) -> Self { | |
Self { | |
wal, | |
newest_clocks: highest_clocks, | |
oldest_clocks: cutoff_clocks, | |
} | |
} | |
// TODO: More meaningful method name and documentation | |
// | |
/// Write a record to the WAL, guarantee durability. | |
/// | |
/// On success, this returns the WAL record number of the written operation along with a WAL | |
/// lock guard. | |
pub async fn lock_and_write<'a>( | |
&'a self, | |
operation: &mut OperationWithClockTag, | |
) -> crate::wal::Result<(u64, ParkingMutexGuard<'a, SerdeWal<OperationWithClockTag>>)> { | |
// Update last seen clock map and correct clock tag if necessary | |
if let Some(clock_tag) = &mut operation.clock_tag { | |
let operation_accepted = self | |
.newest_clocks | |
.lock() | |
.await | |
.advance_clock_and_correct_tag(clock_tag); | |
if !operation_accepted { | |
return Err(crate::wal::WalError::ClockRejected); | |
} | |
} | |
// Write operation to WAL | |
let mut wal_lock = self.wal.lock(); | |
wal_lock.write(operation).map(|op_num| (op_num, wal_lock)) | |
} | |
/// Update the cutoff clock map based on the given recovery point | |
/// | |
/// This can only increase clock ticks in the cutoff clock map. If there already are higher | |
/// clock ticks, they're kept. | |
/// | |
/// It updates the highest seen clocks alongside with it. | |
pub async fn update_cutoff(&self, cutoff: &RecoveryPoint) { | |
// Lock highest and cutoff maps separately to avoid deadlocks | |
{ | |
let mut newest_clocks = self.newest_clocks.lock().await; | |
for clock_tag in cutoff.iter_as_clock_tags() { | |
newest_clocks.advance_clock(clock_tag); | |
} | |
} | |
{ | |
let mut oldest_clocks = self.oldest_clocks.lock().await; | |
for clock_tag in cutoff.iter_as_clock_tags() { | |
oldest_clocks.advance_clock(clock_tag); | |
} | |
} | |
} | |
/// Get a recovery point for this WAL. | |
pub async fn recovery_point(&self) -> RecoveryPoint { | |
self.newest_clocks.lock().await.to_recovery_point() | |
} | |
pub async fn resolve_wal_delta( | |
&self, | |
recovery_point: RecoveryPoint, | |
) -> Result<Option<u64>, WalDeltaError> { | |
let newest_clocks = self.recovery_point().await; | |
let oldest_clocks = self.oldest_clocks.lock().await.to_recovery_point(); | |
resolve_wal_delta( | |
self.wal | |
.lock() | |
.read_all(true) | |
.map(|(op_num, op)| (op_num, op.clock_tag)), | |
recovery_point, | |
newest_clocks, | |
oldest_clocks, | |
) | |
} | |
pub fn wal_version(&self) -> Result<Option<u64>, WalDeltaError> { | |
let wal = self.wal.lock(); | |
if wal.is_empty() { | |
Ok(None) | |
} else { | |
Ok(Some(wal.last_index())) | |
} | |
} | |
/// Append records to this WAL from `other`, starting at operation `append_from` in `other`. | |
pub async fn append_from(&self, other: &Self, append_from: u64) -> crate::wal::Result<()> { | |
let mut operations = other | |
.wal | |
.lock() | |
.read(append_from) | |
.map(|(_, op)| op) | |
.collect::<Vec<_>>(); | |
for update in operations.iter_mut() { | |
let (_, _) = self.lock_and_write(update).await?; | |
} | |
Ok(()) | |
} | |
} | |
/// Resolve the WAL delta for the given `recovery_point` | |
/// | |
/// A `local_wal`, `newest_clocks` and `oldest_clocks` are required to resolve the | |
/// delta. These should be from the node being the source of recovery, likely the current one. The | |
/// `local_wal` is used to resolve the diff. The `newest_clocks` is used to extend the given | |
/// recovery point with clocks the failed node does not know about. The `oldest_clocks` is | |
/// used as lower bound for WAL delta resolution. | |
/// | |
/// The delta can be sent over to the node which the recovery point is from, to restore its | |
/// WAL making it consistent with the current shard. | |
/// | |
/// On success, an option holding a WAL record number is returned. | |
/// If `Some` - the remote WAL can be recovered by sending the local WAL from that record number. | |
/// If `None` - the remote WAL is already equal, and we don't have to send any records. | |
/// If `Err` - no delta can be resolved. | |
fn resolve_wal_delta( | |
operations: impl DoubleEndedIterator<Item = (u64, Option<ClockTag>)>, | |
mut recovery_point: RecoveryPoint, | |
mut newest_clocks: RecoveryPoint, | |
mut oldest_clocks: RecoveryPoint, | |
) -> Result<Option<u64>, WalDeltaError> { | |
// If recovery point is empty, we cannot do a diff transfer | |
if recovery_point.is_empty() { | |
return Err(WalDeltaError::Empty); | |
} | |
// If the recovery point has clocks our current node does not know about | |
// we're missing essential records and cannot resolve a WAL delta | |
if recovery_point.has_clocks_not_in(&newest_clocks) { | |
return Err(WalDeltaError::UnknownClocks); | |
} | |
// If our current node has any lower clock than the recovery point specifies, | |
// we're missing essential records and cannot resolve a WAL delta | |
if recovery_point.has_any_newer_clocks_than(&newest_clocks) { | |
return Err(WalDeltaError::HigherThanCurrent); | |
} | |
// From this point, increase all clocks by one | |
// We must do that so we can specify clock tick 0 as needing everything from that clock | |
recovery_point.increase_all_clocks_by(1); | |
newest_clocks.increase_all_clocks_by(1); | |
oldest_clocks.increase_all_clocks_by(1); | |
// Extend clock map with missing clocks this node know about | |
// Ensure the recovering node gets records for a clock it might not have seen yet | |
recovery_point.initialize_clocks_missing_from(&newest_clocks); | |
// Remove clocks that are equal to this node, we don't have to transfer records for them | |
// TODO: do we want to remove higher clocks too, as the recovery node already has all data? | |
recovery_point.remove_clocks_equal_to(&newest_clocks); | |
// Recovery point may not be below our cutoff point | |
if recovery_point.has_any_older_clocks_than(&oldest_clocks) { | |
return Err(WalDeltaError::Cutoff); | |
} | |
// If there are no points left, WALs match op so we do not recovery anything | |
if recovery_point.is_empty() { | |
return Ok(None); | |
} | |
// Scroll back over the WAL and find a record that covered all clocks, allowing delta resolution | |
// Drain satisfied clocks from the recovery point until we have nothing left | |
log::trace!("Resolving WAL delta for: {recovery_point}"); | |
let mut last_op_num = None; | |
for (op_num, clock_tag) in operations.rev() { | |
// We cannot resolve a delta if we have untagged records | |
let Some(clock_tag) = clock_tag else { | |
return Err(WalDeltaError::UntaggedRecords); | |
}; | |
// Keep scrolling until we have no clocks left | |
let removed_equal = recovery_point.remove_clock_if_newer_or_equal_to_tag(clock_tag); | |
if recovery_point.is_empty() { | |
// If we only removed newer clocks, delta-ing from the previous record is good enough | |
let recover_from = if removed_equal { | |
Some(op_num) | |
} else { | |
last_op_num | |
}; | |
return Ok(recover_from); | |
} | |
last_op_num.replace(op_num); | |
} | |
Err(WalDeltaError::NotFound) | |
} | |
pub enum WalDeltaError { | |
Empty, | |
UnknownClocks, | |
HigherThanCurrent, | |
Cutoff, | |
UntaggedRecords, | |
NotFound, | |
} | |
mod tests { | |
use std::collections::{HashMap, HashSet, VecDeque}; | |
use std::ops::Range; | |
use std::sync::Arc; | |
use parking_lot::Mutex as ParkingMutex; | |
use rand::rngs::StdRng; | |
use rand::seq::SliceRandom; | |
use rand::{Rng, SeedableRng}; | |
use rstest::rstest; | |
use segment::data_types::vectors::VectorStructInternal; | |
use tempfile::{Builder, TempDir}; | |
use wal::WalOptions; | |
use super::*; | |
use crate::operations::point_ops::{ | |
PointInsertOperationsInternal, PointOperations, PointStructPersisted, | |
}; | |
use crate::operations::{ClockTag, CollectionUpdateOperations, OperationWithClockTag}; | |
use crate::shards::local_shard::clock_map::{ClockMap, RecoveryPoint}; | |
use crate::shards::replica_set::clock_set::ClockSet; | |
use crate::wal::SerdeWal; | |
fn fixture_empty_wal() -> (RecoverableWal, TempDir) { | |
let dir = Builder::new().prefix("wal_test").tempdir().unwrap(); | |
let options = WalOptions { | |
segment_capacity: 1024 * 1024, | |
segment_queue_len: 0, | |
}; | |
let wal = SerdeWal::new(dir.path().to_str().unwrap(), options).unwrap(); | |
( | |
RecoverableWal::new( | |
Arc::new(ParkingMutex::new(wal)), | |
Arc::new(Mutex::new(ClockMap::default())), | |
Arc::new(Mutex::new(ClockMap::default())), | |
), | |
dir, | |
) | |
} | |
fn mock_operation(id: u64) -> CollectionUpdateOperations { | |
CollectionUpdateOperations::PointOperation(PointOperations::UpsertPoints( | |
PointInsertOperationsInternal::PointsList(vec![PointStructPersisted { | |
id: id.into(), | |
vector: VectorStructInternal::from(vec![1.0, 2.0, 3.0]).into(), | |
payload: None, | |
}]), | |
)) | |
} | |
/// Test WAL delta resolution with just one missed operation on node C. | |
/// | |
/// See: <https://www.notion.so/qdrant/Testing-suite-4e28a978ec05476080ff26ed07757def?pvs=4> | |
async fn test_resolve_wal_delta_one_operation() { | |
// Create WALs for peer A, B and C | |
let (a_wal, _a_wal_dir) = fixture_empty_wal(); | |
let (b_wal, _b_wal_dir) = fixture_empty_wal(); | |
let (c_wal, _c_wal_dir) = fixture_empty_wal(); | |
// Create clock set for peer A, start first clock from 1 | |
let mut a_clock_set = ClockSet::new(); | |
a_clock_set.get_clock().advance_to(0); | |
// Create operation on peer A | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation(1); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operation to peer A, B and C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let mut c_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
let (_, _) = c_wal.lock_and_write(&mut c_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(c_operation.clock_tag.unwrap().clock_tick); | |
drop(a_clock_0); | |
// Create operation on peer A | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation(2); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operation to peer A and B, not C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
drop(a_clock_0); | |
let c_recovery_point = c_wal.recovery_point().await; | |
// Resolve delta on node A for node C, assert correctness | |
let delta_from = a_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, 1); | |
// Resolve delta on node B for node C, assert correctness | |
let delta_from = b_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, 1); | |
// Diff should have 1 operation, as C missed just one | |
assert_eq!(b_wal.wal.lock().read(delta_from).count(), 1); | |
// Recover WAL on node C by writing delta from node B to it | |
c_wal.append_from(&b_wal, delta_from).await.unwrap(); | |
// WALs should match up perfectly now | |
a_wal | |
.wal | |
.lock() | |
.read(0) | |
.zip(b_wal.wal.lock().read(0)) | |
.zip(c_wal.wal.lock().read(0)) | |
.for_each(|((a, b), c)| { | |
assert_eq!(a, b); | |
assert_eq!(b, c); | |
}); | |
assert_wal_ordering_property(&a_wal, false).await; | |
assert_wal_ordering_property(&b_wal, false).await; | |
assert_wal_ordering_property(&c_wal, false).await; | |
} | |
/// Test WAL delta resolution when there is gaps in the WAL on all machines. | |
/// | |
/// We normally do not expect this situation. But it's good to support it if it happens | |
/// unexpectedly. | |
/// | |
/// See: <https://www.notion.so/qdrant/Testing-suite-4e28a978ec05476080ff26ed07757def?pvs=4> | |
async fn test_resolve_wal_delta_with_gaps( with_gap: bool) { | |
const N: usize = 5; | |
const GAP_SIZE: usize = 10; | |
// Create WALs for peer A, B and C | |
let (a_wal, _a_wal_dir) = fixture_empty_wal(); | |
let (b_wal, _b_wal_dir) = fixture_empty_wal(); | |
let (c_wal, _c_wal_dir) = fixture_empty_wal(); | |
// Create clock set for peer A, start first clock from 1 | |
let mut a_clock_set = ClockSet::new(); | |
a_clock_set.get_clock().advance_to(0); | |
// Create N operations on peer A | |
for n in 0..N { | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation((1 + n) as u64); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operation to peer A, B and C and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let mut c_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
let (_, _) = c_wal.lock_and_write(&mut c_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(c_operation.clock_tag.unwrap().clock_tick); | |
} | |
// Introduce a gap in the clocks on A | |
if with_gap { | |
for _ in 0..GAP_SIZE { | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
a_clock_0.advance_to(clock_tick); | |
} | |
} | |
// Create N operations on peer A, which are missed on node C | |
for n in 0..N { | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation((1 + N + n) as u64); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operation to peer A and B and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
} | |
let c_recovery_point = c_wal.recovery_point().await; | |
// Resolve delta on node A for node C, assert correctness | |
let delta_from = a_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, N as u64); | |
// Resolve delta on node B for node C, assert correctness | |
let delta_from = b_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, N as u64); | |
// Diff should have N operation, as C missed just N of them | |
assert_eq!(b_wal.wal.lock().read(delta_from).count(), N); | |
// Recover WAL on node C by writing delta from node B to it | |
c_wal.append_from(&b_wal, delta_from).await.unwrap(); | |
// WALs should match up perfectly now | |
a_wal | |
.wal | |
.lock() | |
.read(0) | |
.zip(b_wal.wal.lock().read(0)) | |
.zip(c_wal.wal.lock().read(0)) | |
.for_each(|((a, b), c)| { | |
assert_eq!(a, b); | |
assert_eq!(b, c); | |
}); | |
assert_wal_ordering_property(&a_wal, true).await; | |
assert_wal_ordering_property(&b_wal, true).await; | |
assert_wal_ordering_property(&c_wal, true).await; | |
} | |
/// Test WAL delta resolution with a many missed operations on node C. | |
/// | |
/// See: <https://www.notion.so/qdrant/Testing-suite-4e28a978ec05476080ff26ed07757def?pvs=4> | |
async fn test_resolve_wal_delta_many_operations() { | |
const N: usize = 5; | |
const M: usize = 25; | |
// Create WALs for peer A, B and C | |
let (a_wal, _a_wal_dir) = fixture_empty_wal(); | |
let (b_wal, _b_wal_dir) = fixture_empty_wal(); | |
let (c_wal, _c_wal_dir) = fixture_empty_wal(); | |
// Create clock set for peer A, start first clock from 1 | |
let mut a_clock_set = ClockSet::new(); | |
a_clock_set.get_clock().advance_to(0); | |
// Create N operations on peer A | |
for i in 0..N { | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation(i as u64); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operations to peer A, B and C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let mut c_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
let (_, _) = c_wal.lock_and_write(&mut c_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(c_operation.clock_tag.unwrap().clock_tick); | |
} | |
// Create M operations on peer A, which are missed on node C | |
for i in N..N + M { | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation(i as u64); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operations to peer A and B, not C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
} | |
let c_recovery_point = c_wal.recovery_point().await; | |
// Resolve delta on node A for node C, assert correctness | |
let delta_from = a_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, N as u64); | |
// Resolve delta on node B for node C, assert correctness | |
let delta_from = b_wal | |
.resolve_wal_delta(c_recovery_point) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, N as u64); | |
// Diff should have M operations, as node C missed M operations | |
assert_eq!(b_wal.wal.lock().read(delta_from).count(), M); | |
// Recover WAL on node C by writing delta from node B to it | |
c_wal.append_from(&b_wal, delta_from).await.unwrap(); | |
// WALs should match up perfectly now | |
a_wal | |
.wal | |
.lock() | |
.read(0) | |
.zip(b_wal.wal.lock().read(0)) | |
.zip(c_wal.wal.lock().read(0)) | |
.for_each(|((a, b), c)| { | |
assert_eq!(a, b); | |
assert_eq!(b, c); | |
}); | |
assert_wal_ordering_property(&a_wal, false).await; | |
assert_wal_ordering_property(&b_wal, false).await; | |
assert_wal_ordering_property(&c_wal, false).await; | |
} | |
/// Test WAL delta resolution with many intermixed operations on node C. Intermixed as in, | |
/// from multiple nodes. | |
/// | |
/// See: <https://www.notion.so/qdrant/Testing-suite-4e28a978ec05476080ff26ed07757def?pvs=4> | |
async fn test_resolve_wal_delta_many_intermixed_operations() { | |
const N: usize = 3; | |
const M: usize = 50; | |
// Create WALs for peer A, B and C | |
let (a_wal, _a_wal_dir) = fixture_empty_wal(); | |
let (b_wal, _b_wal_dir) = fixture_empty_wal(); | |
let (c_wal, _c_wal_dir) = fixture_empty_wal(); | |
// Create clock sets for peer A and B | |
let mut a_clock_set = ClockSet::new(); | |
let mut b_clock_set = ClockSet::new(); | |
// Create N operations on peer A | |
for i in 0..N { | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
a_clock_0.advance_to(0); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation(i as u64); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operations to peer A, B and C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let mut c_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
let (_, _) = c_wal.lock_and_write(&mut c_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(c_operation.clock_tag.unwrap().clock_tick); | |
} | |
// Create M operations on peers A and B, which are missed on node C | |
for i in N..N + M { | |
let is_node_a = i % 3 == 0; | |
let peer_id = if is_node_a { 1 } else { 2 }; | |
let mut clock = if is_node_a { | |
a_clock_set.get_clock() | |
} else { | |
b_clock_set.get_clock() | |
}; | |
clock.advance_to(0); | |
let clock_tick = clock.tick_once(); | |
let clock_tag = ClockTag::new(peer_id, clock.id(), clock_tick); | |
let bare_operation = mock_operation(i as u64); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operations to peer A and B, not C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
clock.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
clock.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
} | |
let c_recovery_point = c_wal.recovery_point().await; | |
// Resolve delta on node A for node C, assert correctness | |
let delta_from = a_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, N as u64); | |
// Resolve delta on node B for node C, assert correctness | |
let delta_from = b_wal | |
.resolve_wal_delta(c_recovery_point) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, N as u64); | |
// Diff should have M operations, as node C missed M operations | |
assert_eq!(b_wal.wal.lock().read(delta_from).count(), M); | |
// Recover WAL on node C by writing delta from node B to it | |
c_wal.append_from(&b_wal, delta_from).await.unwrap(); | |
// WALs should match up perfectly now | |
a_wal | |
.wal | |
.lock() | |
.read(0) | |
.zip(b_wal.wal.lock().read(0)) | |
.zip(c_wal.wal.lock().read(0)) | |
.for_each(|((a, b), c)| { | |
assert_eq!(a, b); | |
assert_eq!(b, c); | |
}); | |
assert_wal_ordering_property(&a_wal, false).await; | |
assert_wal_ordering_property(&b_wal, false).await; | |
assert_wal_ordering_property(&c_wal, false).await; | |
} | |
/// Test WAL delta resolution with operations in a different order on node A and B. | |
/// | |
/// See: <https://www.notion.so/qdrant/Testing-suite-4e28a978ec05476080ff26ed07757def?pvs=4> | |
async fn test_resolve_wal_delta_unordered_operations() { | |
// Create WALs for peer A, B and C | |
let (a_wal, _a_wal_dir) = fixture_empty_wal(); | |
let (b_wal, _b_wal_dir) = fixture_empty_wal(); | |
let (c_wal, _c_wal_dir) = fixture_empty_wal(); | |
// Create clock sets for peer A and B, start first clocks from 1 | |
let mut a_clock_set = ClockSet::new(); | |
let mut b_clock_set = ClockSet::new(); | |
a_clock_set.get_clock().advance_to(0); | |
b_clock_set.get_clock().advance_to(0); | |
// Create operation on peer A | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let clock_tick = a_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(1, a_clock_0.id(), clock_tick); | |
let bare_operation = mock_operation(1); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operation to peer A, B and C, and advance clocks | |
let mut a_operation = operation.clone(); | |
let mut b_operation = operation.clone(); | |
let mut c_operation = operation.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation).await.unwrap(); | |
let (_, _) = c_wal.lock_and_write(&mut c_operation).await.unwrap(); | |
a_clock_0.advance_to(a_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(b_operation.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(c_operation.clock_tag.unwrap().clock_tick); | |
drop(a_clock_0); | |
// Create operations on nodes A and B | |
let mut a_clock_0 = a_clock_set.get_clock(); | |
let mut b_clock_0 = b_clock_set.get_clock(); | |
let a_clock_tick = a_clock_0.tick_once(); | |
let b_clock_tick = b_clock_0.tick_once(); | |
let a_clock_tag = ClockTag::new(1, a_clock_0.id(), a_clock_tick); | |
let b_clock_tag = ClockTag::new(2, a_clock_0.id(), b_clock_tick); | |
let bare_operation_1 = mock_operation(2); | |
let bare_operation_2 = mock_operation(3); | |
let operation_1 = OperationWithClockTag::new(bare_operation_1, Some(a_clock_tag)); | |
let operation_2 = OperationWithClockTag::new(bare_operation_2, Some(b_clock_tag)); | |
// Write operations to nodes A and B in different order, but not to node C | |
let mut a_operation_1 = operation_1.clone(); | |
let mut a_operation_2 = operation_2.clone(); | |
let mut b_operation_1 = operation_1.clone(); | |
let mut b_operation_2 = operation_2.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation_1).await.unwrap(); | |
let (_, _) = a_wal.lock_and_write(&mut a_operation_2).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation_2).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut b_operation_1).await.unwrap(); | |
a_clock_0.advance_to(a_operation_1.clock_tag.unwrap().clock_tick); | |
a_clock_0.advance_to(a_operation_2.clock_tag.unwrap().clock_tick); | |
b_clock_0.advance_to(b_operation_2.clock_tag.unwrap().clock_tick); | |
b_clock_0.advance_to(b_operation_1.clock_tag.unwrap().clock_tick); | |
drop(a_clock_0); | |
drop(b_clock_0); | |
let c_recovery_point = c_wal.recovery_point().await; | |
// Resolve delta on node A for node C, assert correctness | |
let delta_from = a_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, 1); | |
// Resolve delta on node B for node C, assert correctness | |
let delta_from = b_wal | |
.resolve_wal_delta(c_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
assert_eq!(delta_from, 1); | |
// Diff should have 2 operations on both nodes | |
assert_eq!(a_wal.wal.lock().read(delta_from).count(), 2); | |
assert_eq!(b_wal.wal.lock().read(delta_from).count(), 2); | |
// Recover WAL on node C by writing delta from node B to it | |
c_wal.append_from(&b_wal, delta_from).await.unwrap(); | |
// WAL on node B and C will match, A is in different order | |
assert!(!a_wal | |
.wal | |
.lock() | |
.read(0) | |
.zip(c_wal.wal.lock().read(0)) | |
.all(|(a, c)| a == c)); | |
assert!(b_wal | |
.wal | |
.lock() | |
.read(0) | |
.zip(c_wal.wal.lock().read(0)) | |
.all(|(b, c)| b == c)); | |
// All WALs should have 3 operations | |
assert_eq!(a_wal.wal.lock().read(0).count(), 3); | |
assert_eq!(b_wal.wal.lock().read(0).count(), 3); | |
assert_eq!(c_wal.wal.lock().read(0).count(), 3); | |
// All WALs must have operations for point 1, 2 and 3 | |
let get_point = |op| match op { | |
OperationWithClockTag { | |
operation: | |
CollectionUpdateOperations::PointOperation(PointOperations::UpsertPoints( | |
PointInsertOperationsInternal::PointsList(points), | |
)), | |
.. | |
} => points[0].clone(), | |
_ => unreachable!(), | |
}; | |
let a_wal_point_ids = a_wal | |
.wal | |
.lock() | |
.read(0) | |
.map(|(_, op)| get_point(op).id) | |
.collect::<HashSet<_>>(); | |
let b_wal_point_ids = b_wal | |
.wal | |
.lock() | |
.read(0) | |
.map(|(_, op)| get_point(op).id) | |
.collect::<HashSet<_>>(); | |
let c_wal_point_ids = c_wal | |
.wal | |
.lock() | |
.read(0) | |
.map(|(_, op)| get_point(op).id) | |
.collect::<HashSet<_>>(); | |
(1..=3).for_each(|i| { | |
assert!(a_wal_point_ids.contains(&i.into())); | |
assert!(b_wal_point_ids.contains(&i.into())); | |
assert!(c_wal_point_ids.contains(&i.into())); | |
}); | |
assert_wal_ordering_property(&a_wal, false).await; | |
assert_wal_ordering_property(&b_wal, false).await; | |
assert_wal_ordering_property(&c_wal, false).await; | |
} | |
async fn test_recover_from_previously_recoverred_with_forward_proxy() { | |
// Consider a situation | |
// Steps: | |
// | |
// 1. We initialize 2 operations on A and B, that are successfully written to both from C | |
// 2. Operation 3 is written to A, but not B | |
// 2.1. Operation 30 is written to A, but not B (Second channel) | |
// 3. Operation 4,5 from D is written to both A and B | |
// 4. Now B is reported as failed and we need to recover it from A | |
// 5. During recovery, we send untagged operations from A to B (full transfer) + node C sends an Update | |
// 6. After recovered (need to check consistency of A and B), node D sends an Update to A and B | |
// 7. Now we want to recover newly created node E from B, it expectedly fails, because of cutoff point | |
// 8. Try to recover A from E (expect no diff, as both have same data) | |
// 9. Insert new operation to B but not A to make sure diff resolution starts working after full recovery | |
// Recover | |
// ┌───┬───────►┌───┐ | |
// │ A │ │ B │ | |
// └─▲─┴───────►└─▲─┘ | |
// │ Forward │ | |
// │ │ | |
// │ │ | |
// │ ┌───┐ │ | |
// └───┤ C ├────┘ | |
// Update └───┘ Failed Update | |
// | |
// | |
// | |
// ┌───┐ | |
// ┌───┤ D ├────┐ | |
// Update│ └───┘ │Update | |
// │ │ | |
// │ │ | |
// ┌─▼─┐ ┌─▼─┐ | |
// │ A │ │ B │ | |
// └───┘ └───┘ | |
// | |
// | |
// (Almost Empty) | |
// ┌───┐Recover ┌───┐ | |
// │ B ├───────►│ E │ | |
// └───┘ └───┘ | |
// | |
// | |
// (Identical) | |
// ┌───┐Recover ┌───┐ | |
// │ E ├───────►│ A │ | |
// └───┘ └───┘ | |
let (a_wal, _a_wal_dir) = fixture_empty_wal(); | |
let (b_wal, _b_wal_dir) = fixture_empty_wal(); | |
let (e_wal, _e_wal_dir) = fixture_empty_wal(); | |
let mut c_clock_set = ClockSet::new(); | |
let mut d_clock_set = ClockSet::new(); | |
let node_c_peer_id = 1; | |
let node_d_peer_id = 2; | |
let op1: CollectionUpdateOperations = mock_operation(1); | |
// Initial normal operation, written to both A and B + additionally Em but we will need it later | |
{ | |
// Node C is sending updates to A and B | |
let mut c_clock_0 = c_clock_set.get_clock(); | |
c_clock_0.advance_to(0); | |
let clock_tick = c_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(node_c_peer_id, c_clock_0.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op1, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let mut operation_b = operation_with_clock.clone(); | |
let mut operation_e = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut operation_b).await.unwrap(); | |
let (_, _) = e_wal.lock_and_write(&mut operation_e).await.unwrap(); | |
c_clock_0.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
c_clock_0.advance_to(operation_b.clock_tag.unwrap().clock_tick); | |
c_clock_0.advance_to(operation_e.clock_tag.unwrap().clock_tick); | |
} | |
let op2: CollectionUpdateOperations = mock_operation(2); | |
// Initial normal operation, written to both | |
{ | |
// Node C is sending updates to A and B | |
let mut c_clock_0 = c_clock_set.get_clock(); | |
c_clock_0.advance_to(0); | |
let clock_tick = c_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(node_c_peer_id, c_clock_0.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op2, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let mut operation_b = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut operation_b).await.unwrap(); | |
c_clock_0.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
c_clock_0.advance_to(operation_b.clock_tag.unwrap().clock_tick); | |
} | |
let op3: CollectionUpdateOperations = mock_operation(3); | |
let op30: CollectionUpdateOperations = mock_operation(30); | |
// Next operation gets written to A, but not B | |
{ | |
// Node C is sending updates to A and B | |
let mut c_clock_0 = c_clock_set.get_clock(); | |
let mut c_clock_1 = c_clock_set.get_clock(); | |
c_clock_0.advance_to(0); | |
c_clock_1.advance_to(0); | |
{ | |
// First parallel operation | |
let clock_tick = c_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(node_c_peer_id, c_clock_0.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op3, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
c_clock_0.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
} | |
{ | |
// Second parallel operation | |
let clock_tick = c_clock_1.tick_once(); | |
let clock_tag = ClockTag::new(node_c_peer_id, c_clock_1.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op30, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
c_clock_1.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
} | |
} | |
let op4: CollectionUpdateOperations = mock_operation(4); | |
// Node D sends an update to both A and B, both successfully written | |
{ | |
let mut d_clock_0 = d_clock_set.get_clock(); | |
d_clock_0.advance_to(0); | |
let clock_tick = d_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(node_d_peer_id, d_clock_0.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op4, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let mut operation_b = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut operation_b).await.unwrap(); | |
d_clock_0.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
d_clock_0.advance_to(operation_b.clock_tag.unwrap().clock_tick); | |
} | |
let op5: CollectionUpdateOperations = mock_operation(5); | |
// Node D sends an update to both A and B, both successfully written | |
{ | |
let mut d_clock_0 = d_clock_set.get_clock(); | |
d_clock_0.advance_to(0); | |
let clock_tick = d_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(node_d_peer_id, d_clock_0.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op5, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let mut operation_b = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut operation_b).await.unwrap(); | |
d_clock_0.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
d_clock_0.advance_to(operation_b.clock_tag.unwrap().clock_tick); | |
} | |
// Now B is reported as failed and we need to recover it from A | |
let b_recovery_point = b_wal.recovery_point().await; | |
let delta_from = a_wal | |
.resolve_wal_delta(b_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
// Operation 0 and 1 are written to both and do not need to be recovered | |
// All further operations have to be written to B | |
assert_eq!(delta_from, 2); | |
// But instead of recovering from WAL, we will check full streaming transfer | |
{ | |
let op1 = mock_operation(1); | |
let op1_with_clock = OperationWithClockTag::new(op1, None); | |
let (_, _) = b_wal | |
.lock_and_write(&mut op1_with_clock.clone()) | |
.await | |
.unwrap(); | |
let op2 = mock_operation(2); | |
let op2_with_clock = OperationWithClockTag::new(op2, None); | |
let (_, _) = b_wal | |
.lock_and_write(&mut op2_with_clock.clone()) | |
.await | |
.unwrap(); | |
} | |
let op6: CollectionUpdateOperations = mock_operation(6); | |
// In between the recovery, we have a new update from C | |
// It is written to both A and B, plus forwarded to B with forward proxy | |
{ | |
let mut c_clock_0 = c_clock_set.get_clock(); | |
c_clock_0.advance_to(0); | |
let clock_tick = c_clock_0.tick_once(); | |
let clock_tag = ClockTag::new(node_c_peer_id, c_clock_0.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op6, Some(clock_tag)); | |
let mut operation_a = operation_with_clock.clone(); | |
let mut operation_b = operation_with_clock.clone(); | |
let mut operation_b_forward = operation_with_clock.clone(); | |
let (_, _) = a_wal.lock_and_write(&mut operation_a).await.unwrap(); | |
let (_, _) = b_wal.lock_and_write(&mut operation_b).await.unwrap(); | |
let (_, _) = b_wal | |
.lock_and_write(&mut operation_b_forward) | |
.await | |
.unwrap(); | |
c_clock_0.advance_to(operation_a.clock_tag.unwrap().clock_tick); | |
c_clock_0.advance_to(operation_b.clock_tag.unwrap().clock_tick); | |
} | |
// Continue recovery | |
{ | |
let op3 = mock_operation(3); | |
let op3_with_clock = OperationWithClockTag::new(op3, None); | |
let (_, _) = b_wal | |
.lock_and_write(&mut op3_with_clock.clone()) | |
.await | |
.unwrap(); | |
let op30 = mock_operation(30); | |
let op30_with_clock = OperationWithClockTag::new(op30, None); | |
let (_, _) = b_wal | |
.lock_and_write(&mut op30_with_clock.clone()) | |
.await | |
.unwrap(); | |
let op4 = mock_operation(4); | |
let op4_with_clock = OperationWithClockTag::new(op4, None); | |
let (_, _) = b_wal | |
.lock_and_write(&mut op4_with_clock.clone()) | |
.await | |
.unwrap(); | |
let op5 = mock_operation(5); | |
let op5_with_clock = OperationWithClockTag::new(op5, None); | |
let (_, _) = b_wal | |
.lock_and_write(&mut op5_with_clock.clone()) | |
.await | |
.unwrap(); | |
// Once full transfer is done, we update cutoff point on B to the last seen of A | |
b_wal.update_cutoff(&a_wal.recovery_point().await).await; | |
} | |
// Try to recover E from B | |
let e_recovery_point = e_wal.recovery_point().await; | |
// Cannot recover E from B, because B has a high cutoff point due to the full transfer | |
let delta_from = b_wal.resolve_wal_delta(e_recovery_point.clone()).await; | |
assert_eq!(delta_from.unwrap_err(), WalDeltaError::Cutoff); | |
// Try to recover A from B | |
// Which should also fail, because B has a high cutoff point due to the full transfer | |
let a_recovery_point = a_wal.recovery_point().await; | |
/* | |
a_recovery_point = RecoveryPoint { | |
clocks: { | |
C_1: 1, | |
C: 4, | |
D: 2, | |
}, | |
} | |
cutoff_point_b = RecoveryPoint { | |
clocks: { | |
C_1: 1, | |
C: 4, | |
D: 2, | |
}, | |
} | |
*/ | |
let delta_from = b_wal.resolve_wal_delta(a_recovery_point.clone()).await; | |
// No diff expected | |
assert_eq!(delta_from, Ok(None)); | |
let op7 = mock_operation(7); | |
// Add operation to B but not A | |
{ | |
// Node D is sending updates to B | |
let mut d_clock = d_clock_set.get_clock(); | |
d_clock.advance_to(0); | |
// First parallel operation | |
let clock_tick = d_clock.tick_once(); | |
let clock_tag = ClockTag::new(node_d_peer_id, d_clock.id(), clock_tick); | |
let operation_with_clock = OperationWithClockTag::new(op7, Some(clock_tag)); | |
let mut operation_b = operation_with_clock.clone(); | |
let (_, _) = b_wal.lock_and_write(&mut operation_b).await.unwrap(); | |
d_clock.advance_to(operation_b.clock_tag.unwrap().clock_tick); | |
} | |
let a_recovery_point = a_wal.recovery_point().await; | |
let delta_from = b_wal | |
.resolve_wal_delta(a_recovery_point.clone()) | |
.await | |
.unwrap() | |
.unwrap(); | |
// Diff expected | |
assert_eq!(b_wal.wal.lock().read(delta_from).count(), 1); | |
assert_wal_ordering_property(&a_wal, false).await; | |
assert_wal_ordering_property(&b_wal, false).await; | |
assert_wal_ordering_property(&e_wal, false).await; | |
} | |
/// A randomized and more extensive test for resolving a WAL delta. | |
/// | |
/// This tests configurations from 2 up to 10 nodes. | |
/// | |
/// This randomizes: | |
/// - The number of operations | |
/// - What node is used as entry point | |
/// - What node dies | |
/// | |
/// This test does the following 25 times: | |
/// - insert random number of operations on all nodes | |
/// - randomly kill a number of nodes (or rather, mark as killed) | |
/// - write random number of operations some operations to all other nodes | |
/// - recover the killed nodes | |
/// - assert correctness | |
/// | |
/// See: <https://www.notion.so/qdrant/Testing-suite-4e28a978ec05476080ff26ed07757def?pvs=4> | |
async fn test_resolve_wal_delta_randomized( | |
usize, | node_count:|
usize>, | dead_nodes_range: Range<|
) { | |
let mut rng = StdRng::seed_from_u64(42); | |
let mut point_id_source = 1..; | |
// Create WALs, clock sets and clock maps for each node | |
let mut wals = std::iter::repeat_with(fixture_empty_wal) | |
.take(node_count) | |
.collect::<Vec<_>>(); | |
let mut clock_sets = std::iter::repeat_with(ClockSet::new) | |
.take(node_count) | |
.collect::<Vec<_>>(); | |
// A list of clocks we don't release for some iterations | |
let mut kept_clocks = vec![]; | |
// 25 times: | |
// - insert random number of operations on all nodes | |
// - randomly kill a node (or rather, mark as killed) | |
// - write random number of operations some operations to all other nodes | |
// - recover the killed node | |
// - assert correctness | |
for _ in 0..25 { | |
// Insert random number of operations on all nodes | |
for _ in 0..rng.gen_range(0..10) { | |
let entrypoint = rng.gen_range(0..node_count); | |
let mut clock = clock_sets[entrypoint].get_clock(); | |
clock.advance_to(0); | |
let clock_tick = clock.tick_once(); | |
let clock_tag = ClockTag::new(entrypoint as u64, clock.id(), clock_tick); | |
let bare_operation = | |
CollectionUpdateOperations::PointOperation(PointOperations::UpsertPoints( | |
PointInsertOperationsInternal::PointsList(vec![PointStructPersisted { | |
id: point_id_source.next().unwrap().into(), | |
vector: VectorStructInternal::from( | |
std::iter::repeat_with(|| rng.gen::<f32>()) | |
.take(3) | |
.collect::<Vec<_>>(), | |
) | |
.into(), | |
payload: None, | |
}]), | |
)); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operations to all WALs | |
for (wal, _wal_dir) in wals.iter_mut() { | |
let mut operation = operation.clone(); | |
let (_, _) = wal.lock_and_write(&mut operation).await.unwrap(); | |
clock.advance_to(operation.clock_tag.unwrap().clock_tick); | |
} | |
// Maybe keep the clock for some iterations | |
let keep_clock_for = rng.gen_range(0..3); | |
if keep_clock_for > 0 { | |
kept_clocks.push((keep_clock_for, clock)); | |
} | |
} | |
// Make a random list of alive and dead nodes | |
let mut alive_nodes = (0..node_count).collect::<Vec<_>>(); | |
alive_nodes.shuffle(&mut rng); | |
let dead_nodes = alive_nodes | |
.drain(0..rng.gen_range(dead_nodes_range.clone())) | |
.collect::<HashSet<_>>(); | |
// Insert random number of operations into all alive nodes | |
let operation_count = rng.gen_range(0..100); | |
for _ in 0..operation_count { | |
let entrypoint = *alive_nodes.choose(&mut rng).unwrap(); | |
let mut clock = clock_sets[entrypoint].get_clock(); | |
clock.advance_to(0); | |
let clock_tick = clock.tick_once(); | |
let clock_tag = ClockTag::new(entrypoint as u64, clock.id(), clock_tick); | |
let bare_operation = | |
CollectionUpdateOperations::PointOperation(PointOperations::UpsertPoints( | |
PointInsertOperationsInternal::PointsList(vec![PointStructPersisted { | |
id: point_id_source.next().unwrap().into(), | |
vector: VectorStructInternal::from( | |
std::iter::repeat_with(|| rng.gen::<f32>()) | |
.take(3) | |
.collect::<Vec<_>>(), | |
) | |
.into(), | |
payload: None, | |
}]), | |
)); | |
let operation = OperationWithClockTag::new(bare_operation, Some(clock_tag)); | |
// Write operations to all WALs and clock maps on alive node | |
for &alive_node in &alive_nodes { | |
let mut operation = operation.clone(); | |
let (_, _) = wals[alive_node] | |
.0 | |
.lock_and_write(&mut operation) | |
.await | |
.unwrap(); | |
clock.advance_to(operation.clock_tag.unwrap().clock_tick); | |
} | |
// Maybe keep the clock for some iterations | |
let keep_clock_for = rng.gen_range(0..10); | |
if keep_clock_for > 0 { | |
kept_clocks.push((keep_clock_for, clock)); | |
} | |
} | |
// Recover dead nodes | |
for dead_node in dead_nodes { | |
// Resolve WAL on every alive node, to recover the dead node | |
let recovery_point = wals[dead_node].0.recovery_point().await; | |
let mut from_deltas = HashSet::new(); | |
for &alive_node in &alive_nodes { | |
let delta_from = wals[alive_node] | |
.0 | |
.resolve_wal_delta(recovery_point.clone()) | |
.await | |
.expect("failed to resolve WAL delta on alive node"); | |
from_deltas.insert(delta_from); | |
} | |
assert_eq!(from_deltas.len(), 1, "found different delta starting points in different WALs, while all should be the same"); | |
let delta_from = from_deltas.into_iter().next().unwrap(); | |
assert_eq!( | |
delta_from.is_some(), | |
operation_count > 0, | |
"if we had operations to some node, we must find a delta, otherwise not", | |
); | |
// Recover WAL on the dead node from a random alive node | |
if let Some(delta_from) = delta_from { | |
let alive_node = *alive_nodes.choose(&mut rng).unwrap(); | |
wals[dead_node] | |
.0 | |
.append_from(&wals[alive_node].0, delta_from) | |
.await | |
.unwrap(); | |
} | |
} | |
// All WALs must be equal, having exactly the same entries | |
wals.iter() | |
.map(|wal| wal.0.wal.lock()) | |
.collect::<Vec<_>>() | |
.windows(2) | |
.for_each(|wals| { | |
assert!( | |
wals[0].read(0).eq(wals[1].read(0)), | |
"all WALs must have the same entries", | |
); | |
}); | |
// Release some kept clocks | |
kept_clocks.retain(|(mut keep_for, _)| { | |
keep_for -= 1; | |
keep_for > 0 | |
}); | |
} | |
for (wal, _) in wals { | |
assert_wal_ordering_property(&wal, false).await; | |
} | |
} | |
/// We cannot resolve a WAL delta if the slice contains records without a clock tag. | |
async fn test_cannot_resolve_delta_over_untagged_record() { | |
let (wal, _wal_dir) = fixture_empty_wal(); | |
// Insert 3 operations with clocks | |
let (_, _) = wal | |
.lock_and_write(&mut OperationWithClockTag::new( | |
mock_operation(1), | |
Some(ClockTag::new(1, 0, 1)), | |
)) | |
.await | |
.unwrap(); | |
let (_, _) = wal | |
.lock_and_write(&mut OperationWithClockTag::new( | |
mock_operation(2), | |
Some(ClockTag::new(1, 0, 2)), | |
)) | |
.await | |
.unwrap(); | |
let (_, _) = wal | |
.lock_and_write(&mut OperationWithClockTag::new( | |
mock_operation(3), | |
Some(ClockTag::new(1, 0, 3)), | |
)) | |
.await | |
.unwrap(); | |
// Can resolve a delta for the last clock | |
let mut recovery_point = RecoveryPoint::default(); | |
recovery_point.insert(1, 0, 2); | |
let resolve_result = wal.resolve_wal_delta(recovery_point).await.unwrap(); | |
assert_eq!(resolve_result, Some(2)); | |
// Insert operation 4 and 5, where operation 4 does not have a clock tag | |
let (_, _) = wal | |
.lock_and_write(&mut OperationWithClockTag::new(mock_operation(4), None)) | |
.await | |
.unwrap(); | |
let (_, _) = wal | |
.lock_and_write(&mut OperationWithClockTag::new( | |
mock_operation(5), | |
Some(ClockTag::new(1, 0, 4)), | |
)) | |
.await | |
.unwrap(); | |
// Can still resolve a delta for the last clock | |
let mut recovery_point = RecoveryPoint::default(); | |
recovery_point.insert(1, 0, 4); | |
let resolve_result = wal.resolve_wal_delta(recovery_point).await.unwrap(); | |
assert_eq!(resolve_result, None); | |
// Cannot resolve a delta for our previous clock, it now has an untagged record after it | |
let mut recovery_point = RecoveryPoint::default(); | |
recovery_point.insert(1, 0, 2); | |
let resolve_result = wal.resolve_wal_delta(recovery_point).await; | |
assert_eq!(resolve_result.unwrap_err(), WalDeltaError::UntaggedRecords); | |
} | |
/// Empty recovery point should not resolve any diff. | |
fn test_empty_recovery_point() { | |
let (wal, _wal_dir) = fixture_empty_wal(); | |
// Empty recovery points, should not resolve any diff | |
let recovery_point = RecoveryPoint::default(); | |
let newest_clocks = RecoveryPoint::default(); | |
let resolve_result = resolve_wal_delta( | |
wal.wal | |
.lock() | |
.read_all(true) | |
.map(|(op_num, op)| (op_num, op.clock_tag)), | |
recovery_point, | |
newest_clocks, | |
RecoveryPoint::default(), | |
); | |
assert_eq!(resolve_result.unwrap_err(), WalDeltaError::Empty); | |
} | |
/// Recovery point with a clock our source does not know about cannot resolve a diff. | |
fn test_recover_point_has_unknown_clock() { | |
let (wal, _wal_dir) = fixture_empty_wal(); | |
let mut recovery_point = RecoveryPoint::default(); | |
let mut newest_clocks = RecoveryPoint::default(); | |
// Recovery point has a clock our source does not know about | |
recovery_point.insert(1, 0, 15); | |
recovery_point.insert(1, 1, 8); | |
recovery_point.insert(2, 1, 5); | |
newest_clocks.insert(1, 0, 20); | |
newest_clocks.insert(1, 1, 8); | |
let resolve_result = resolve_wal_delta( | |
wal.wal | |
.lock() | |
.read_all(true) | |
.map(|(op_num, op)| (op_num, op.clock_tag)), | |
recovery_point, | |
newest_clocks, | |
RecoveryPoint::default(), | |
); | |
assert_eq!(resolve_result.unwrap_err(), WalDeltaError::UnknownClocks); | |
} | |
/// Recovery point with higher clocks than the source cannot resolve a diff. | |
fn test_recover_point_higher_than_source() { | |
let (wal, _wal_dir) = fixture_empty_wal(); | |
let mut recovery_point = RecoveryPoint::default(); | |
let mut newest_clocks = RecoveryPoint::default(); | |
// Recovery point asks tick 10, but source only has tick 8 | |
recovery_point.insert(1, 0, 15); | |
recovery_point.insert(1, 1, 10); | |
newest_clocks.insert(1, 0, 20); | |
newest_clocks.insert(1, 1, 8); | |
let resolve_result = resolve_wal_delta( | |
wal.wal | |
.lock() | |
.read_all(true) | |
.map(|(op_num, op)| (op_num, op.clock_tag)), | |
recovery_point, | |
newest_clocks, | |
RecoveryPoint::default(), | |
); | |
assert_eq!( | |
resolve_result.unwrap_err(), | |
WalDeltaError::HigherThanCurrent | |
); | |
} | |
/// Recovery point requests clocks that are already truncated | |
fn test_recover_point_cutoff() { | |
let (wal, _wal_dir) = fixture_empty_wal(); | |
let mut recovery_point = RecoveryPoint::default(); | |
let mut newest_clocks = RecoveryPoint::default(); | |
let mut oldest_clocks = RecoveryPoint::default(); | |
// Recovery point asks clock tick that has been truncated already | |
recovery_point.insert(1, 0, 15); | |
recovery_point.insert(1, 1, 10); | |
newest_clocks.insert(1, 0, 20); | |
newest_clocks.insert(1, 1, 12); | |
oldest_clocks.insert(1, 0, 16); | |
let resolve_result = resolve_wal_delta( | |
wal.wal | |
.lock() | |
.read_all(true) | |
.map(|(op_num, op)| (op_num, op.clock_tag)), | |
recovery_point, | |
newest_clocks, | |
oldest_clocks, | |
); | |
assert_eq!(resolve_result.unwrap_err(), WalDeltaError::Cutoff); | |
} | |
/// Recovery point operations are not in our WAL. | |
fn test_recover_point_not_in_wal() { | |
let (wal, _wal_dir) = fixture_empty_wal(); | |
let mut recovery_point = RecoveryPoint::default(); | |
let mut newest_clocks = RecoveryPoint::default(); | |
// Recovery point asks tick 10, but source only has tick 8 | |
recovery_point.insert(1, 0, 15); | |
recovery_point.insert(1, 1, 10); | |
newest_clocks.insert(1, 0, 20); | |
newest_clocks.insert(1, 1, 12); | |
let resolve_result = resolve_wal_delta( | |
wal.wal | |
.lock() | |
.read_all(true) | |
.map(|(op_num, op)| (op_num, op.clock_tag)), | |
recovery_point, | |
newest_clocks, | |
RecoveryPoint::default(), | |
); | |
assert_eq!(resolve_result.unwrap_err(), WalDeltaError::NotFound); | |
} | |
/// Assert that we `check_clock_tag_ordering_property` on the WAL. | |
async fn assert_wal_ordering_property(wal: &RecoverableWal, allow_gaps: bool) { | |
// Grab list of clock tags from WAL records, skip non-existent or below cutoff tags | |
let clock_tags = { | |
let cutoff = wal.oldest_clocks.lock().await; | |
wal.wal | |
.lock() | |
.read(0) | |
// Only take records with clock tags | |
.filter_map(|(_, operation)| operation.clock_tag) | |
// Clock tags must be equal or higher to cutoff point | |
.filter(|clock_tag| { | |
cutoff | |
.current_tick(clock_tag.peer_id, clock_tag.clock_id) | |
.map_or(true, |cutoff_tick| clock_tag.clock_tick >= cutoff_tick) | |
}) | |
.collect::<Vec<_>>() | |
}; | |
check_clock_tag_ordering_property(&clock_tags, allow_gaps) | |
.expect("WAL ordering property violated"); | |
} | |
/// Test that we satisfy the clock ordering property, allowing WAL recovery resolution. | |
/// | |
/// Property: | |
/// For each operation with peer+clock tick X, all following operations having the same | |
/// peer+clock must cover ticks X+1, X+2, ..., X+n in order up to the highest tick value of | |
/// that peer+clock in the WAL. | |
/// | |
/// More specifically, this tests the property again on every clock tag. The result of this | |
/// check is that the sequence always ends in order at the end, going up to the highest clock | |
/// clock tick. | |
/// | |
/// `allow_gaps` specifies whether gaps in the clock tick sequences are allowed. | |
/// | |
/// This logic is validated with examples in `validate_clock_tag_ordering_property`. | |
/// | |
/// This property may not be valid if a diff transfer has not been resolved correctly or | |
/// completely, or if the WAL got malformed in another way. | |
fn check_clock_tag_ordering_property( | |
clock_tags: &[ClockTag], | |
allow_gaps: bool, | |
) -> Result<(), String> { | |
// Get the highest clock value for each clock+peer | |
let mut highest_clocks = HashMap::new(); | |
for clock_tag in clock_tags { | |
highest_clocks | |
.entry((clock_tag.peer_id, clock_tag.clock_id)) | |
.and_modify(|highest| *highest = clock_tag.clock_tick.max(*highest)) | |
.or_insert(clock_tag.clock_tick); | |
} | |
// Test each clock tag for the ordering property | |
for (i, clock_tag) in clock_tags.iter().enumerate() { | |
let key = (clock_tag.peer_id, clock_tag.clock_id); | |
let highest = highest_clocks[&key]; | |
// An ordered list of ticks we must see for this peer+clock | |
let mut must_see_ticks = | |
((clock_tag.clock_tick + 1)..=highest).collect::<VecDeque<_>>(); | |
// For all the following clock tags of the same peer+clock, remove their tick value | |
for newer in clock_tags.iter().skip(i + 1) { | |
// Skip other peer and clock pairs | |
if (newer.peer_id, newer.clock_id) != key { | |
continue; | |
} | |
// Keep removing ticks we must see from the beginning of the list | |
// If we don't allow gaps, we only remove this exact tick from the beginning | |
// If we do allow gaps, we remove this tick and all lower ones from the beginning | |
while { | |
must_see_ticks.front().map_or(false, |&tick| { | |
if allow_gaps { | |
tick <= newer.clock_tick | |
} else { | |
tick == newer.clock_tick | |
} | |
}) | |
} { | |
must_see_ticks.pop_front().unwrap(); | |
} | |
} | |
// If list is not empty, we have not seen all numbers | |
if !must_see_ticks.is_empty() { | |
return Err(format!( | |
"following clock tags did not cover ticks [{}] in order (peer_id: {}, clock_id: {}, max_tick: {highest})", | |
must_see_ticks.into_iter().map(|tick| tick.to_string()).collect::<Vec<_>>().join(", "), | |
clock_tag.peer_id, | |
clock_tag.clock_id, | |
)); | |
} | |
} | |
Ok(()) | |
} | |
/// Validate that `check_clock_tag_ordering_property` works as expected. | |
/// | |
/// Yes, this is a test for a test for a test. (⌐■_■) | |
fn validate_clock_tag_ordering_property( allow_gaps: bool) { | |
// Empty is fine | |
check_clock_tag_ordering_property(&[], allow_gaps).unwrap(); | |
// Any one clock tag is fine | |
check_clock_tag_ordering_property(&[ClockTag::new(1, 2, 3)], allow_gaps).unwrap(); | |
// Clock tags in order are allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 3), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Clock tags in order with gaps are only allowed if specified | |
let result = check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Misses 1:0:3-9 | |
ClockTag::new(1, 0, 10), | |
ClockTag::new(1, 0, 11), | |
], | |
allow_gaps, | |
); | |
assert_eq!(result.is_ok(), allow_gaps); | |
// Not starting at zero (truncated) is allowed | |
check_clock_tag_ordering_property( | |
&[ | |
// Truncated | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 3), | |
ClockTag::new(1, 0, 4), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Repeated clock tags are allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 2), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Repeating clock tag sequence is allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Repeats 1:0:0-2 two more times | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Repeating part of clock tag sequence is allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 3), | |
// Repeats 1:0:2-3 two more times | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 3), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 3), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Repeating clock tag sequence with new ones at the end is allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Repeats 1:0:0-2 one more time | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Adds 1:0:3-6 on top of it | |
ClockTag::new(1, 0, 3), | |
ClockTag::new(1, 0, 4), | |
ClockTag::new(1, 0, 5), | |
ClockTag::new(1, 0, 6), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Repeating clock tags in random order is allowed, as long as the end is in order | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Repeats 1:0:0-2 a few more times in random order | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Adds 1:0:3 on top of it | |
ClockTag::new(1, 0, 3), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Repeating clock tag sequence must not miss clock tags at the end | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
// Misses 1:0:2 | |
], | |
allow_gaps, | |
) | |
.unwrap_err(); | |
// Repeating clock tag sequence must only miss clock tags in the middle if specified | |
let result = check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 0, 0), | |
// Misses 1:0:1 | |
ClockTag::new(1, 0, 2), | |
], | |
allow_gaps, | |
); | |
assert_eq!(result.is_ok(), allow_gaps); | |
// Skipping a clock ID is allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Skipped clock ID 1 | |
ClockTag::new(1, 2, 10), | |
ClockTag::new(1, 2, 11), | |
ClockTag::new(1, 2, 12), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Intermixed repeating clock tag sequence is allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 1, 0), | |
ClockTag::new(2, 0, 0), | |
ClockTag::new(2, 0, 1), | |
ClockTag::new(2, 0, 2), | |
ClockTag::new(1, 1, 1), | |
ClockTag::new(2, 0, 0), | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 1, 2), | |
ClockTag::new(1, 1, 3), | |
ClockTag::new(2, 0, 1), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(2, 0, 2), | |
ClockTag::new(1, 1, 4), | |
ClockTag::new(1, 0, 2), | |
ClockTag::new(1, 1, 5), | |
], | |
allow_gaps, | |
) | |
.unwrap(); | |
// Intermixed clock tag sequence where one tick for peer 2 is missing is only allowed if specified | |
let result = check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(2, 0, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(2, 0, 1), | |
ClockTag::new(1, 0, 2), | |
// Misses 2:0:2 | |
ClockTag::new(1, 0, 3), | |
ClockTag::new(2, 0, 3), | |
], | |
allow_gaps, | |
); | |
assert_eq!(result.is_ok(), allow_gaps); | |
// Intermixed clock tag sequence where one tick for clock ID 1 is missing is only allowed if specified | |
let result = check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(1, 1, 0), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(1, 1, 1), | |
ClockTag::new(1, 0, 2), | |
// Misses 1:1:2 | |
ClockTag::new(1, 0, 3), | |
ClockTag::new(1, 1, 3), | |
], | |
allow_gaps, | |
); | |
assert_eq!(result.is_ok(), allow_gaps); | |
// Intermixed clock tag sequence where one tick is missing is not allowed | |
check_clock_tag_ordering_property( | |
&[ | |
ClockTag::new(1, 0, 0), | |
ClockTag::new(2, 0, 0), | |
ClockTag::new(3, 0, 0), | |
ClockTag::new(3, 0, 1), | |
ClockTag::new(1, 0, 1), | |
ClockTag::new(2, 0, 1), | |
ClockTag::new(3, 0, 2), | |
ClockTag::new(2, 0, 2), | |
ClockTag::new(1, 0, 2), | |
// Peer 2 only partially recovering here, missing 2:0:2 | |
ClockTag::new(2, 0, 0), | |
ClockTag::new(2, 0, 1), | |
// Peer 1 and 3 continue | |
ClockTag::new(1, 0, 3), | |
ClockTag::new(1, 0, 4), | |
ClockTag::new(3, 0, 3), | |
ClockTag::new(3, 0, 4), | |
], | |
allow_gaps, | |
) | |
.unwrap_err(); | |
} | |
} | |