Spaces:
Build error
Build error
use prometheus::proto::{Counter, Gauge, LabelPair, Metric, MetricFamily, MetricType}; | |
use prometheus::TextEncoder; | |
use segment::common::operation_time_statistics::OperationDurationStatistics; | |
use crate::common::telemetry::TelemetryData; | |
use crate::common::telemetry_ops::app_telemetry::{AppBuildTelemetry, AppFeaturesTelemetry}; | |
use crate::common::telemetry_ops::cluster_telemetry::{ClusterStatusTelemetry, ClusterTelemetry}; | |
use crate::common::telemetry_ops::collections_telemetry::{ | |
CollectionTelemetryEnum, CollectionsTelemetry, | |
}; | |
use crate::common::telemetry_ops::memory_telemetry::MemoryTelemetry; | |
use crate::common::telemetry_ops::requests_telemetry::{ | |
GrpcTelemetry, RequestsTelemetry, WebApiTelemetry, | |
}; | |
/// Whitelist for REST endpoints in metrics output. | |
/// | |
/// Contains selection of search, recommend, scroll and upsert endpoints. | |
/// | |
/// This array *must* be sorted. | |
const REST_ENDPOINT_WHITELIST: &[&str] = &[ | |
"/collections/{name}/index", | |
"/collections/{name}/points", | |
"/collections/{name}/points/batch", | |
"/collections/{name}/points/count", | |
"/collections/{name}/points/delete", | |
"/collections/{name}/points/discover", | |
"/collections/{name}/points/discover/batch", | |
"/collections/{name}/points/facet", | |
"/collections/{name}/points/payload", | |
"/collections/{name}/points/payload/clear", | |
"/collections/{name}/points/payload/delete", | |
"/collections/{name}/points/query", | |
"/collections/{name}/points/query/batch", | |
"/collections/{name}/points/query/groups", | |
"/collections/{name}/points/recommend", | |
"/collections/{name}/points/recommend/batch", | |
"/collections/{name}/points/recommend/groups", | |
"/collections/{name}/points/scroll", | |
"/collections/{name}/points/search", | |
"/collections/{name}/points/search/batch", | |
"/collections/{name}/points/search/groups", | |
"/collections/{name}/points/search/matrix/offsets", | |
"/collections/{name}/points/search/matrix/pairs", | |
"/collections/{name}/points/vectors", | |
"/collections/{name}/points/vectors/delete", | |
]; | |
/// Whitelist for GRPC endpoints in metrics output. | |
/// | |
/// Contains selection of search, recommend, scroll and upsert endpoints. | |
/// | |
/// This array *must* be sorted. | |
const GRPC_ENDPOINT_WHITELIST: &[&str] = &[ | |
"/qdrant.Points/ClearPayload", | |
"/qdrant.Points/Count", | |
"/qdrant.Points/Delete", | |
"/qdrant.Points/DeletePayload", | |
"/qdrant.Points/Discover", | |
"/qdrant.Points/DiscoverBatch", | |
"/qdrant.Points/Facet", | |
"/qdrant.Points/Get", | |
"/qdrant.Points/OverwritePayload", | |
"/qdrant.Points/Query", | |
"/qdrant.Points/QueryBatch", | |
"/qdrant.Points/QueryGroups", | |
"/qdrant.Points/Recommend", | |
"/qdrant.Points/RecommendBatch", | |
"/qdrant.Points/RecommendGroups", | |
"/qdrant.Points/Scroll", | |
"/qdrant.Points/Search", | |
"/qdrant.Points/SearchBatch", | |
"/qdrant.Points/SearchGroups", | |
"/qdrant.Points/SetPayload", | |
"/qdrant.Points/UpdateBatch", | |
"/qdrant.Points/UpdateVectors", | |
"/qdrant.Points/Upsert", | |
]; | |
/// For REST requests, only report timings when having this HTTP response status. | |
const REST_TIMINGS_FOR_STATUS: u16 = 200; | |
/// Encapsulates metrics data in Prometheus format. | |
pub struct MetricsData { | |
metrics: Vec<MetricFamily>, | |
} | |
impl MetricsData { | |
pub fn format_metrics(&self) -> String { | |
TextEncoder::new().encode_to_string(&self.metrics).unwrap() | |
} | |
} | |
impl From<TelemetryData> for MetricsData { | |
fn from(telemetry_data: TelemetryData) -> Self { | |
let mut metrics = vec![]; | |
telemetry_data.add_metrics(&mut metrics); | |
Self { metrics } | |
} | |
} | |
trait MetricsProvider { | |
/// Add metrics definitions for this. | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>); | |
} | |
impl MetricsProvider for TelemetryData { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
self.app.add_metrics(metrics); | |
self.collections.add_metrics(metrics); | |
self.cluster.add_metrics(metrics); | |
self.requests.add_metrics(metrics); | |
if let Some(mem) = &self.memory { | |
mem.add_metrics(metrics); | |
} | |
} | |
} | |
impl MetricsProvider for AppBuildTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
metrics.push(metric_family( | |
"app_info", | |
"information about qdrant server", | |
MetricType::GAUGE, | |
vec![gauge( | |
1.0, | |
&[("name", &self.name), ("version", &self.version)], | |
)], | |
)); | |
self.features.iter().for_each(|f| f.add_metrics(metrics)); | |
} | |
} | |
impl MetricsProvider for AppFeaturesTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
metrics.push(metric_family( | |
"app_status_recovery_mode", | |
"features enabled in qdrant server", | |
MetricType::GAUGE, | |
vec![gauge(if self.recovery_mode { 1.0 } else { 0.0 }, &[])], | |
)) | |
} | |
} | |
impl MetricsProvider for CollectionsTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
let vector_count = self | |
.collections | |
.iter() | |
.flatten() | |
.map(|p| match p { | |
CollectionTelemetryEnum::Aggregated(a) => a.vectors, | |
CollectionTelemetryEnum::Full(c) => c.count_vectors(), | |
}) | |
.sum::<usize>(); | |
metrics.push(metric_family( | |
"collections_total", | |
"number of collections", | |
MetricType::GAUGE, | |
vec![gauge(self.number_of_collections as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"collections_vector_total", | |
"total number of vectors in all collections", | |
MetricType::GAUGE, | |
vec![gauge(vector_count as f64, &[])], | |
)); | |
} | |
} | |
impl MetricsProvider for ClusterTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
let ClusterTelemetry { | |
enabled, | |
status, | |
config: _, | |
peers: _, | |
metadata: _, | |
} = self; | |
metrics.push(metric_family( | |
"cluster_enabled", | |
"is cluster support enabled", | |
MetricType::GAUGE, | |
vec![gauge(if *enabled { 1.0 } else { 0.0 }, &[])], | |
)); | |
if let Some(ref status) = status { | |
status.add_metrics(metrics); | |
} | |
} | |
} | |
impl MetricsProvider for ClusterStatusTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
metrics.push(metric_family( | |
"cluster_peers_total", | |
"total number of cluster peers", | |
MetricType::GAUGE, | |
vec![gauge(self.number_of_peers as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"cluster_term", | |
"current cluster term", | |
MetricType::COUNTER, | |
vec![counter(self.term as f64, &[])], | |
)); | |
if let Some(ref peer_id) = self.peer_id.map(|p| p.to_string()) { | |
metrics.push(metric_family( | |
"cluster_commit", | |
"index of last committed (finalized) operation cluster peer is aware of", | |
MetricType::COUNTER, | |
vec![counter(self.commit as f64, &[("peer_id", peer_id)])], | |
)); | |
metrics.push(metric_family( | |
"cluster_pending_operations_total", | |
"total number of pending operations for cluster peer", | |
MetricType::GAUGE, | |
vec![gauge(self.pending_operations as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"cluster_voter", | |
"is cluster peer a voter or learner", | |
MetricType::GAUGE, | |
vec![gauge(if self.is_voter { 1.0 } else { 0.0 }, &[])], | |
)); | |
} | |
} | |
} | |
impl MetricsProvider for RequestsTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
self.rest.add_metrics(metrics); | |
self.grpc.add_metrics(metrics); | |
} | |
} | |
impl MetricsProvider for WebApiTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
let mut builder = OperationDurationMetricsBuilder::default(); | |
for (endpoint, responses) in &self.responses { | |
let Some((method, endpoint)) = endpoint.split_once(' ') else { | |
continue; | |
}; | |
// Endpoint must be whitelisted | |
if REST_ENDPOINT_WHITELIST.binary_search(&endpoint).is_err() { | |
continue; | |
} | |
for (status, stats) in responses { | |
builder.add( | |
stats, | |
&[ | |
("method", method), | |
("endpoint", endpoint), | |
("status", &status.to_string()), | |
], | |
*status == REST_TIMINGS_FOR_STATUS, | |
); | |
} | |
} | |
builder.build("rest", metrics); | |
} | |
} | |
impl MetricsProvider for GrpcTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
let mut builder = OperationDurationMetricsBuilder::default(); | |
for (endpoint, stats) in &self.responses { | |
// Endpoint must be whitelisted | |
if GRPC_ENDPOINT_WHITELIST | |
.binary_search(&endpoint.as_str()) | |
.is_err() | |
{ | |
continue; | |
} | |
builder.add(stats, &[("endpoint", endpoint.as_str())], true); | |
} | |
builder.build("grpc", metrics); | |
} | |
} | |
impl MetricsProvider for MemoryTelemetry { | |
fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) { | |
metrics.push(metric_family( | |
"memory_active_bytes", | |
"Total number of bytes in active pages allocated by the application", | |
MetricType::GAUGE, | |
vec![gauge(self.active_bytes as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"memory_allocated_bytes", | |
"Total number of bytes allocated by the application", | |
MetricType::GAUGE, | |
vec![gauge(self.allocated_bytes as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"memory_metadata_bytes", | |
"Total number of bytes dedicated to metadata", | |
MetricType::GAUGE, | |
vec![gauge(self.metadata_bytes as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"memory_resident_bytes", | |
"Maximum number of bytes in physically resident data pages mapped", | |
MetricType::GAUGE, | |
vec![gauge(self.resident_bytes as f64, &[])], | |
)); | |
metrics.push(metric_family( | |
"memory_retained_bytes", | |
"Total number of bytes in virtual memory mappings", | |
MetricType::GAUGE, | |
vec![gauge(self.retained_bytes as f64, &[])], | |
)); | |
} | |
} | |
/// A helper struct to build a vector of [`MetricFamily`] out of a collection of | |
/// [`OperationDurationStatistics`]. | |
struct OperationDurationMetricsBuilder { | |
total: Vec<Metric>, | |
fail_total: Vec<Metric>, | |
avg_secs: Vec<Metric>, | |
min_secs: Vec<Metric>, | |
max_secs: Vec<Metric>, | |
duration_histogram_secs: Vec<Metric>, | |
} | |
impl OperationDurationMetricsBuilder { | |
/// Add metrics for the provided statistics. | |
/// If `add_timings` is `false`, only the total and fail_total counters will be added. | |
pub fn add( | |
&mut self, | |
stat: &OperationDurationStatistics, | |
labels: &[(&str, &str)], | |
add_timings: bool, | |
) { | |
self.total.push(counter(stat.count as f64, labels)); | |
self.fail_total | |
.push(counter(stat.fail_count as f64, labels)); | |
if !add_timings { | |
return; | |
} | |
self.avg_secs.push(gauge( | |
f64::from(stat.avg_duration_micros.unwrap_or(0.0)) / 1_000_000.0, | |
labels, | |
)); | |
self.min_secs.push(gauge( | |
f64::from(stat.min_duration_micros.unwrap_or(0.0)) / 1_000_000.0, | |
labels, | |
)); | |
self.max_secs.push(gauge( | |
f64::from(stat.max_duration_micros.unwrap_or(0.0)) / 1_000_000.0, | |
labels, | |
)); | |
self.duration_histogram_secs.push(histogram( | |
stat.count as u64, | |
stat.total_duration_micros as f64 / 1_000_000.0, | |
&stat | |
.duration_micros_histogram | |
.iter() | |
.map(|&(b, c)| (f64::from(b) / 1_000_000.0, c as u64)) | |
.collect::<Vec<_>>(), | |
labels, | |
)); | |
} | |
/// Build metrics and add them to the provided vector. | |
pub fn build(self, prefix: &str, metrics: &mut Vec<MetricFamily>) { | |
if !self.total.is_empty() { | |
metrics.push(metric_family( | |
&format!("{prefix}_responses_total"), | |
"total number of responses", | |
MetricType::COUNTER, | |
self.total, | |
)); | |
} | |
if !self.fail_total.is_empty() { | |
metrics.push(metric_family( | |
&format!("{prefix}_responses_fail_total"), | |
"total number of failed responses", | |
MetricType::COUNTER, | |
self.fail_total, | |
)); | |
} | |
if !self.avg_secs.is_empty() { | |
metrics.push(metric_family( | |
&format!("{prefix}_responses_avg_duration_seconds"), | |
"average response duration", | |
MetricType::GAUGE, | |
self.avg_secs, | |
)); | |
} | |
if !self.min_secs.is_empty() { | |
metrics.push(metric_family( | |
&format!("{prefix}_responses_min_duration_seconds"), | |
"minimum response duration", | |
MetricType::GAUGE, | |
self.min_secs, | |
)); | |
} | |
if !self.max_secs.is_empty() { | |
metrics.push(metric_family( | |
&format!("{prefix}_responses_max_duration_seconds"), | |
"maximum response duration", | |
MetricType::GAUGE, | |
self.max_secs, | |
)); | |
} | |
if !self.duration_histogram_secs.is_empty() { | |
metrics.push(metric_family( | |
&format!("{prefix}_responses_duration_seconds"), | |
"response duration histogram", | |
MetricType::HISTOGRAM, | |
self.duration_histogram_secs, | |
)); | |
} | |
} | |
} | |
fn metric_family(name: &str, help: &str, r#type: MetricType, metrics: Vec<Metric>) -> MetricFamily { | |
let mut metric_family = MetricFamily::default(); | |
metric_family.set_name(name.into()); | |
metric_family.set_help(help.into()); | |
metric_family.set_field_type(r#type); | |
metric_family.set_metric(metrics); | |
metric_family | |
} | |
fn counter(value: f64, labels: &[(&str, &str)]) -> Metric { | |
let mut metric = Metric::default(); | |
metric.set_label(labels.iter().map(|(n, v)| label_pair(n, v)).collect()); | |
metric.set_counter({ | |
let mut counter = Counter::default(); | |
counter.set_value(value); | |
counter | |
}); | |
metric | |
} | |
fn gauge(value: f64, labels: &[(&str, &str)]) -> Metric { | |
let mut metric = Metric::default(); | |
metric.set_label(labels.iter().map(|(n, v)| label_pair(n, v)).collect()); | |
metric.set_gauge({ | |
let mut gauge = Gauge::default(); | |
gauge.set_value(value); | |
gauge | |
}); | |
metric | |
} | |
fn histogram( | |
sample_count: u64, | |
sample_sum: f64, | |
buckets: &[(f64, u64)], | |
labels: &[(&str, &str)], | |
) -> Metric { | |
let mut metric = Metric::default(); | |
metric.set_label(labels.iter().map(|(n, v)| label_pair(n, v)).collect()); | |
metric.set_histogram({ | |
let mut histogram = prometheus::proto::Histogram::default(); | |
histogram.set_sample_count(sample_count); | |
histogram.set_sample_sum(sample_sum); | |
histogram.set_bucket( | |
buckets | |
.iter() | |
.map(|&(upper_bound, cumulative_count)| { | |
let mut bucket = prometheus::proto::Bucket::default(); | |
bucket.set_cumulative_count(cumulative_count); | |
bucket.set_upper_bound(upper_bound); | |
bucket | |
}) | |
.collect(), | |
); | |
histogram | |
}); | |
metric | |
} | |
fn label_pair(name: &str, value: &str) -> LabelPair { | |
let mut label = LabelPair::default(); | |
label.set_name(name.into()); | |
label.set_value(value.into()); | |
label | |
} | |
mod tests { | |
fn test_endpoint_whitelists_sorted() { | |
use super::{GRPC_ENDPOINT_WHITELIST, REST_ENDPOINT_WHITELIST}; | |
assert!( | |
REST_ENDPOINT_WHITELIST.windows(2).all(|n| n[0] <= n[1]), | |
"REST_ENDPOINT_WHITELIST must be sorted in code to allow binary search" | |
); | |
assert!( | |
GRPC_ENDPOINT_WHITELIST.windows(2).all(|n| n[0] <= n[1]), | |
"GRPC_ENDPOINT_WHITELIST must be sorted in code to allow binary search" | |
); | |
} | |
} | |