colibri.qdrant / src /main.rs
Gouzi Mohaled
Ajout du dossier src
d8435ba
#[cfg(feature = "web")]
mod actix;
mod common;
mod consensus;
mod greeting;
mod issues_setup;
mod migrations;
mod settings;
mod snapshots;
mod startup;
mod tonic;
mod tracing;
use std::io::Error;
use std::sync::Arc;
use std::thread;
use std::thread::JoinHandle;
use std::time::Duration;
use ::common::cpu::{get_cpu_budget, CpuBudget};
use ::tonic::transport::Uri;
use api::grpc::transport_channel_pool::TransportChannelPool;
use clap::Parser;
use collection::shards::channel_service::ChannelService;
use consensus::Consensus;
use slog::Drain;
use startup::setup_panic_hook;
use storage::content_manager::consensus::operation_sender::OperationSender;
use storage::content_manager::consensus::persistent::Persistent;
use storage::content_manager::consensus_manager::{ConsensusManager, ConsensusStateRef};
use storage::content_manager::toc::dispatcher::TocDispatcher;
use storage::content_manager::toc::TableOfContent;
use storage::dispatcher::Dispatcher;
use storage::rbac::Access;
#[cfg(all(
not(target_env = "msvc"),
any(target_arch = "x86_64", target_arch = "aarch64")
))]
use tikv_jemallocator::Jemalloc;
use crate::common::helpers::{
create_general_purpose_runtime, create_search_runtime, create_update_runtime,
load_tls_client_config,
};
use crate::common::inference::service::InferenceService;
use crate::common::telemetry::TelemetryCollector;
use crate::common::telemetry_reporting::TelemetryReporter;
use crate::greeting::welcome;
use crate::migrations::single_to_cluster::handle_existing_collections;
use crate::settings::Settings;
use crate::snapshots::{recover_full_snapshot, recover_snapshots};
use crate::startup::{remove_started_file_indicator, touch_started_file_indicator};
#[cfg(all(
not(target_env = "msvc"),
any(target_arch = "x86_64", target_arch = "aarch64")
))]
#[global_allocator]
static GLOBAL: Jemalloc = Jemalloc;
const FULL_ACCESS: Access = Access::full("For main");
/// Qdrant (read: quadrant ) is a vector similarity search engine.
/// It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload.
///
/// This CLI starts a Qdrant peer/server.
#[derive(Parser, Debug)]
#[command(version, about)]
struct Args {
/// Uri of the peer to bootstrap from in case of multi-peer deployment.
/// If not specified - this peer will be considered as a first in a new deployment.
#[arg(long, value_parser, value_name = "URI")]
bootstrap: Option<Uri>,
/// Uri of this peer.
/// Other peers should be able to reach it by this uri.
///
/// This value has to be supplied if this is the first peer in a new deployment.
///
/// In case this is not the first peer and it bootstraps the value is optional.
/// If not supplied then qdrant will take internal grpc port from config and derive the IP address of this peer on bootstrap peer (receiving side)
#[arg(long, value_parser, value_name = "URI")]
uri: Option<Uri>,
/// Force snapshot re-creation
/// If provided - existing collections will be replaced with snapshots.
/// Default is to not recreate from snapshots.
#[arg(short, long, action, default_value_t = false)]
force_snapshot: bool,
/// List of paths to snapshot files.
/// Format: <snapshot_file_path>:<target_collection_name>
///
/// WARN: Do not use this option if you are recovering collection in existing distributed cluster.
/// Use `/collections/<collection-name>/snapshots/recover` API instead.
#[arg(long, value_name = "PATH:NAME", alias = "collection-snapshot")]
snapshot: Option<Vec<String>>,
/// Path to snapshot of multiple collections.
/// Format: <snapshot_file_path>
///
/// WARN: Do not use this option if you are recovering collection in existing distributed cluster.
/// Use `/collections/<collection-name>/snapshots/recover` API instead.
#[arg(long, value_name = "PATH")]
storage_snapshot: Option<String>,
/// Path to an alternative configuration file.
/// Format: <config_file_path>
///
/// Default path: config/config.yaml
#[arg(long, value_name = "PATH")]
config_path: Option<String>,
/// Disable telemetry sending to developers
/// If provided - telemetry collection will be disabled.
/// Read more: <https://qdrant.tech/documentation/guides/telemetry>
#[arg(long, action, default_value_t = false)]
disable_telemetry: bool,
/// Run stacktrace collector. Used for debugging.
#[arg(long, action, default_value_t = false)]
stacktrace: bool,
/// Reinit consensus state.
/// When enabled, the service will assume the consensus should be reinitialized.
/// The exact behavior depends on if this current node has bootstrap URI or not.
/// If it has - it'll remove current consensus state and consensus WAL (while keeping peer ID)
/// and will try to receive state from the bootstrap peer.
/// If it doesn't have - it'll remove other peers from voters promote
/// the current peer to the leader and the single member of the cluster.
/// It'll also compact consensus WAL to force snapshot
#[arg(long, action, default_value_t = false)]
reinit: bool,
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
// Run backtrace collector, expected to used by `rstack` crate
if args.stacktrace {
#[cfg(all(target_os = "linux", feature = "stacktrace"))]
{
let _ = rstack_self::child();
}
return Ok(());
}
remove_started_file_indicator();
let settings = Settings::new(args.config_path)?;
let reporting_enabled = !settings.telemetry_disabled && !args.disable_telemetry;
let reporting_id = TelemetryCollector::generate_id();
let logger_handle = tracing::setup(
settings
.logger
.with_top_level_directive(settings.log_level.clone()),
)?;
setup_panic_hook(reporting_enabled, reporting_id.to_string());
memory::madvise::set_global(settings.storage.mmap_advice);
segment::vector_storage::common::set_async_scorer(
settings
.storage
.performance
.async_scorer
.unwrap_or_default(),
);
welcome(&settings);
if let Some(recovery_warning) = &settings.storage.recovery_mode {
log::warn!("Qdrant is loaded in recovery mode: {}", recovery_warning);
log::warn!(
"Read more: https://qdrant.tech/documentation/guides/administration/#recovery-mode"
);
}
// Validate as soon as possible, but we must initialize logging first
settings.validate_and_warn();
// Saved state of the consensus.
let persistent_consensus_state = Persistent::load_or_init(
&settings.storage.storage_path,
args.bootstrap.is_none(),
args.reinit,
)?;
let is_distributed_deployment = settings.cluster.enabled;
let temp_path = settings.storage.temp_path.as_deref();
let restored_collections = if let Some(full_snapshot) = args.storage_snapshot {
recover_full_snapshot(
temp_path,
&full_snapshot,
&settings.storage.storage_path,
args.force_snapshot,
persistent_consensus_state.this_peer_id(),
is_distributed_deployment,
)
} else if let Some(snapshots) = args.snapshot {
// recover from snapshots
recover_snapshots(
&snapshots,
args.force_snapshot,
temp_path,
&settings.storage.storage_path,
persistent_consensus_state.this_peer_id(),
is_distributed_deployment,
)
} else {
vec![]
};
// Create and own search runtime out of the scope of async context to ensure correct
// destruction of it
let search_runtime = create_search_runtime(settings.storage.performance.max_search_threads)
.expect("Can't search create runtime.");
let update_runtime =
create_update_runtime(settings.storage.performance.max_optimization_threads)
.expect("Can't optimizer create runtime.");
let general_runtime =
create_general_purpose_runtime().expect("Can't optimizer general purpose runtime.");
let runtime_handle = general_runtime.handle().clone();
// Use global CPU budget for optimizations based on settings
let optimizer_cpu_budget = CpuBudget::new(get_cpu_budget(
settings.storage.performance.optimizer_cpu_budget,
));
// Create a signal sender and receiver. It is used to communicate with the consensus thread.
let (propose_sender, propose_receiver) = std::sync::mpsc::channel();
let propose_operation_sender = if settings.cluster.enabled {
// High-level channel which could be used to send User-space consensus operations
Some(OperationSender::new(propose_sender))
} else {
// We don't need sender for the single-node mode
None
};
// Channel service is used to manage connections between peers.
// It allocates required number of channels and manages proper reconnection handling
let mut channel_service =
ChannelService::new(settings.service.http_port, settings.service.api_key.clone());
if is_distributed_deployment {
// We only need channel_service in case if cluster is enabled.
// So we initialize it with real values here
let p2p_grpc_timeout = Duration::from_millis(settings.cluster.grpc_timeout_ms);
let connection_timeout = Duration::from_millis(settings.cluster.connection_timeout_ms);
let tls_config = load_tls_client_config(&settings)?;
channel_service.channel_pool = Arc::new(TransportChannelPool::new(
p2p_grpc_timeout,
connection_timeout,
settings.cluster.p2p.connection_pool_size,
tls_config,
));
channel_service.id_to_address = persistent_consensus_state.peer_address_by_id.clone();
channel_service.id_to_metadata = persistent_consensus_state.peer_metadata_by_id.clone();
}
// Table of content manages the list of collections.
// It is a main entry point for the storage.
let toc = TableOfContent::new(
&settings.storage,
search_runtime,
update_runtime,
general_runtime,
optimizer_cpu_budget,
channel_service.clone(),
persistent_consensus_state.this_peer_id(),
propose_operation_sender.clone(),
);
toc.clear_all_tmp_directories()?;
// Here we load all stored collections.
runtime_handle.block_on(async {
for collection in toc.all_collections(&FULL_ACCESS).await {
log::debug!("Loaded collection: {collection}");
}
});
let toc_arc = Arc::new(toc);
let storage_path = toc_arc.storage_path();
// Holder for all actively running threads of the service: web, gPRC, consensus, etc.
let mut handles: Vec<JoinHandle<Result<(), Error>>> = vec![];
// Router for external queries.
// It decides if query should go directly to the ToC or through the consensus.
let mut dispatcher = Dispatcher::new(toc_arc.clone());
let (telemetry_collector, dispatcher_arc, health_checker) = if is_distributed_deployment {
let consensus_state: ConsensusStateRef = ConsensusManager::new(
persistent_consensus_state,
toc_arc.clone(),
propose_operation_sender.unwrap(),
storage_path,
)
.into();
let is_new_deployment = consensus_state.is_new_deployment();
dispatcher = dispatcher.with_consensus(consensus_state.clone());
let toc_dispatcher = TocDispatcher::new(Arc::downgrade(&toc_arc), consensus_state.clone());
toc_arc.with_toc_dispatcher(toc_dispatcher);
let dispatcher_arc = Arc::new(dispatcher);
// Monitoring and telemetry.
let telemetry_collector =
TelemetryCollector::new(settings.clone(), dispatcher_arc.clone(), reporting_id);
let tonic_telemetry_collector = telemetry_collector.tonic_telemetry_collector.clone();
// `raft` crate uses `slog` crate so it is needed to use `slog_stdlog::StdLog` to forward
// logs from it to `log` crate
let slog_logger = slog::Logger::root(slog_stdlog::StdLog.fuse(), slog::o!());
// Runs raft consensus in a separate thread.
// Create a pipe `message_sender` to communicate with the consensus
let health_checker = Arc::new(common::health::HealthChecker::spawn(
toc_arc.clone(),
consensus_state.clone(),
&runtime_handle,
// NOTE: `wait_for_bootstrap` should be calculated *before* starting `Consensus` thread
consensus_state.is_new_deployment() && args.bootstrap.is_some(),
));
let handle = Consensus::run(
&slog_logger,
consensus_state.clone(),
args.bootstrap,
args.uri.map(|uri| uri.to_string()),
settings.clone(),
channel_service,
propose_receiver,
tonic_telemetry_collector,
toc_arc.clone(),
runtime_handle.clone(),
args.reinit,
)
.expect("Can't initialize consensus");
handles.push(handle);
let toc_arc_clone = toc_arc.clone();
let consensus_state_clone = consensus_state.clone();
let _cancel_transfer_handle = runtime_handle.spawn(async move {
consensus_state_clone.is_leader_established.await_ready();
match toc_arc_clone
.cancel_outgoing_all_transfers("Source peer restarted")
.await
{
Ok(_) => {
log::debug!("All transfers if any cancelled");
}
Err(err) => {
log::error!("Can't cancel outgoing transfers: {}", err);
}
}
});
// TODO(resharding): Remove resharding driver?
//
// runtime_handle.block_on(async {
// toc_arc.resume_resharding_tasks().await;
// });
let collections_to_recover_in_consensus = if is_new_deployment {
let existing_collections =
runtime_handle.block_on(toc_arc.all_collections(&FULL_ACCESS));
existing_collections
.into_iter()
.map(|pass| pass.name().to_string())
.collect()
} else {
restored_collections
};
if !collections_to_recover_in_consensus.is_empty() {
runtime_handle.block_on(handle_existing_collections(
toc_arc.clone(),
consensus_state.clone(),
dispatcher_arc.clone(),
consensus_state.this_peer_id(),
collections_to_recover_in_consensus,
));
}
(telemetry_collector, dispatcher_arc, Some(health_checker))
} else {
log::info!("Distributed mode disabled");
let dispatcher_arc = Arc::new(dispatcher);
// Monitoring and telemetry.
let telemetry_collector =
TelemetryCollector::new(settings.clone(), dispatcher_arc.clone(), reporting_id);
(telemetry_collector, dispatcher_arc, None)
};
let tonic_telemetry_collector = telemetry_collector.tonic_telemetry_collector.clone();
//
// Telemetry reporting
//
let reporting_id = telemetry_collector.reporting_id();
let telemetry_collector = Arc::new(tokio::sync::Mutex::new(telemetry_collector));
if reporting_enabled {
log::info!("Telemetry reporting enabled, id: {}", reporting_id);
runtime_handle.spawn(TelemetryReporter::run(telemetry_collector.clone()));
} else {
log::info!("Telemetry reporting disabled");
}
// Setup subscribers to listen for issue-able events
issues_setup::setup_subscribers(&settings);
// Helper to better log start errors
let log_err_if_any = |server_name, result| match result {
Err(err) => {
log::error!("Error while starting {} server: {}", server_name, err);
Err(err)
}
ok => ok,
};
//
// Inference Service
//
if let Some(inference_config) = settings.inference.clone() {
match InferenceService::init_global(inference_config) {
Ok(_) => {
log::info!("Inference service is configured.");
}
Err(err) => {
log::error!("{err}");
}
}
} else {
log::info!("Inference service is not configured.");
}
//
// REST API server
//
#[cfg(feature = "web")]
{
let dispatcher_arc = dispatcher_arc.clone();
let settings = settings.clone();
let handle = thread::Builder::new()
.name("web".to_string())
.spawn(move || {
log_err_if_any(
"REST",
actix::init(
dispatcher_arc.clone(),
telemetry_collector,
health_checker,
settings,
logger_handle,
),
)
})
.unwrap();
handles.push(handle);
}
//
// gRPC server
//
if let Some(grpc_port) = settings.service.grpc_port {
let settings = settings.clone();
let handle = thread::Builder::new()
.name("grpc".to_string())
.spawn(move || {
log_err_if_any(
"gRPC",
tonic::init(
dispatcher_arc,
tonic_telemetry_collector,
settings,
grpc_port,
runtime_handle,
),
)
})
.unwrap();
handles.push(handle);
} else {
log::info!("gRPC endpoint disabled");
}
#[cfg(feature = "service_debug")]
{
use std::fmt::Write;
use parking_lot::deadlock;
const DEADLOCK_CHECK_PERIOD: Duration = Duration::from_secs(10);
thread::Builder::new()
.name("deadlock_checker".to_string())
.spawn(move || loop {
thread::sleep(DEADLOCK_CHECK_PERIOD);
let deadlocks = deadlock::check_deadlock();
if deadlocks.is_empty() {
continue;
}
let mut error = format!("{} deadlocks detected\n", deadlocks.len());
for (i, threads) in deadlocks.iter().enumerate() {
writeln!(error, "Deadlock #{i}").expect("fail to writeln!");
for t in threads {
writeln!(
error,
"Thread Id {:#?}\n{:#?}",
t.thread_id(),
t.backtrace()
)
.expect("fail to writeln!");
}
}
log::error!("{}", error);
})
.unwrap();
}
touch_started_file_indicator();
for handle in handles {
log::debug!(
"Waiting for thread {} to finish",
handle.thread().name().unwrap()
);
handle.join().expect("thread is not panicking")?;
}
drop(toc_arc);
drop(settings);
Ok(())
}