File size: 15,843 Bytes
84d2a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
use std::collections::HashMap;
use std::future::Future;
use std::num::NonZeroUsize;
use std::time::Duration;

use rand::{thread_rng, Rng};
use tokio::select;
use tonic::codegen::InterceptedService;
use tonic::service::Interceptor;
use tonic::transport::{Channel, ClientTlsConfig, Error as TonicError, Uri};
use tonic::{Code, Request, Status};

use crate::grpc::dynamic_channel_pool::DynamicChannelPool;
use crate::grpc::dynamic_pool::CountedItem;
use crate::grpc::qdrant::qdrant_client::QdrantClient;
use crate::grpc::qdrant::HealthCheckRequest;

/// Maximum lifetime of a gRPC channel.
///
/// Using 1 day (24 hours) because the request with the longest timeout currently uses the same
/// timeout value. Namely the shard recovery call used in shard snapshot transfer.
pub const MAX_GRPC_CHANNEL_TIMEOUT: Duration = Duration::from_secs(24 * 60 * 60);

pub const DEFAULT_GRPC_TIMEOUT: Duration = Duration::from_secs(60);
pub const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
pub const DEFAULT_POOL_SIZE: usize = 2;

/// Allow a large number of connections per channel, that is close to the limit of
/// `http2_max_pending_accept_reset_streams` that we configure to minimize the chance of
/// GOAWAY/ENHANCE_YOUR_CALM errors from occurring.
/// More info: <https://github.com/qdrant/qdrant/issues/1907>
const MAX_CONNECTIONS_PER_CHANNEL: usize = 1024;
pub const DEFAULT_RETRIES: usize = 2;
const DEFAULT_BACKOFF: Duration = Duration::from_millis(100);

/// How long to wait for response from server, before checking health of the server
const SMART_CONNECT_INTERVAL: Duration = Duration::from_secs(1);

/// There is no indication, that health-check API is affected by high parallel load
/// So we can use small timeout for health-check
const HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(2);

/// Try to recreate channel, if there were no successful requests within this time
const CHANNEL_TTL: Duration = Duration::from_secs(5);

#[derive(thiserror::Error, Debug)]
pub enum RequestError<E: std::error::Error> {
    #[error("Error in closure supplied to transport channel pool: {0}")]
    FromClosure(E),
    #[error("Tonic error: {0}")]
    Tonic(#[from] TonicError),
}

enum RetryAction {
    Fail(Status),
    RetryOnce(Status),
    RetryWithBackoff(Status),
    RetryImmediately(Status),
}

#[derive(Debug)]
enum HealthCheckError {
    NoChannel,
    ConnectionError(TonicError),
    RequestError(Status),
}

#[derive(Debug)]
enum RequestFailure {
    HealthCheck(HealthCheckError),
    RequestError(Status),
    RequestConnection(TonicError),
}

/// Intercepts gRPC requests and adds a default timeout if it wasn't already set.
pub struct AddTimeout {
    default_timeout: Duration,
}

impl AddTimeout {
    pub fn new(default_timeout: Duration) -> Self {
        Self { default_timeout }
    }
}

impl Interceptor for AddTimeout {
    fn call(&mut self, mut request: Request<()>) -> Result<Request<()>, Status> {
        if request.metadata().get("grpc-timeout").is_none() {
            request.set_timeout(self.default_timeout);
        }
        Ok(request)
    }
}

/// Holds a pool of channels established for a set of URIs.
/// Channel are shared by cloning them.
/// Make the `pool_size` larger to increase throughput.
pub struct TransportChannelPool {
    uri_to_pool: tokio::sync::RwLock<HashMap<Uri, DynamicChannelPool>>,
    pool_size: NonZeroUsize,
    grpc_timeout: Duration,
    connection_timeout: Duration,
    tls_config: Option<ClientTlsConfig>,
}

impl Default for TransportChannelPool {
    fn default() -> Self {
        Self {
            uri_to_pool: tokio::sync::RwLock::new(HashMap::new()),
            pool_size: NonZeroUsize::new(DEFAULT_POOL_SIZE).unwrap(),
            grpc_timeout: DEFAULT_GRPC_TIMEOUT,
            connection_timeout: DEFAULT_CONNECT_TIMEOUT,
            tls_config: None,
        }
    }
}

impl TransportChannelPool {
    pub fn new(
        p2p_grpc_timeout: Duration,
        connection_timeout: Duration,
        pool_size: usize,
        tls_config: Option<ClientTlsConfig>,
    ) -> Self {
        Self {
            uri_to_pool: Default::default(),
            grpc_timeout: p2p_grpc_timeout,
            connection_timeout,
            pool_size: NonZeroUsize::new(pool_size).unwrap(),
            tls_config,
        }
    }

    async fn _init_pool_for_uri(&self, uri: Uri) -> Result<DynamicChannelPool, TonicError> {
        DynamicChannelPool::new(
            uri,
            MAX_GRPC_CHANNEL_TIMEOUT,
            self.connection_timeout,
            self.tls_config.clone(),
            MAX_CONNECTIONS_PER_CHANNEL,
            self.pool_size.get(),
        )
        .await
    }

    /// Initialize a pool for the URI and return a clone of the first channel.
    /// Does not fail if the pool already exist.
    async fn init_pool_for_uri(&self, uri: Uri) -> Result<CountedItem<Channel>, TonicError> {
        let mut guard = self.uri_to_pool.write().await;
        match guard.get_mut(&uri) {
            None => {
                let channels = self._init_pool_for_uri(uri.clone()).await?;
                let channel = channels.choose().await?;
                guard.insert(uri, channels);
                Ok(channel)
            }
            Some(channels) => channels.choose().await,
        }
    }

    pub async fn drop_pool(&self, uri: &Uri) {
        let mut guard = self.uri_to_pool.write().await;
        guard.remove(uri);
    }

    pub async fn drop_channel(&self, uri: &Uri, channel: CountedItem<Channel>) {
        let guard = self.uri_to_pool.read().await;
        if let Some(pool) = guard.get(uri) {
            pool.drop_channel(channel);
        }
    }

    async fn get_pooled_channel(
        &self,
        uri: &Uri,
    ) -> Option<Result<CountedItem<Channel>, TonicError>> {
        let guard = self.uri_to_pool.read().await;
        match guard.get(uri) {
            None => None,
            Some(channels) => Some(channels.choose().await),
        }
    }

    async fn get_or_create_pooled_channel(
        &self,
        uri: &Uri,
    ) -> Result<CountedItem<Channel>, TonicError> {
        match self.get_pooled_channel(uri).await {
            None => self.init_pool_for_uri(uri.clone()).await,
            Some(channel) => channel,
        }
    }

    /// Checks if the channel is still alive.
    ///
    /// It uses duplicate "fast" channel, equivalent to the original, but with smaller timeout.
    /// If it can't get healthcheck response in the timeout, it assumes the channel is dead.
    /// And we need to drop the pool for the uri and try again.
    /// For performance reasons, we start the check only after `SMART_CONNECT_TIMEOUT`.
    async fn check_connectability(&self, uri: &Uri) -> HealthCheckError {
        loop {
            tokio::time::sleep(SMART_CONNECT_INTERVAL).await;
            let channel = self.get_pooled_channel(uri).await;
            match channel {
                None => return HealthCheckError::NoChannel,
                Some(Err(tonic_error)) => return HealthCheckError::ConnectionError(tonic_error),
                Some(Ok(channel)) => {
                    let mut client = QdrantClient::new(channel.item().clone());

                    let resp: Result<_, Status> = select! {
                        res = client.health_check(HealthCheckRequest {}) => {
                            res
                        }
                        _ = tokio::time::sleep(HEALTH_CHECK_TIMEOUT) => {
                            // Current healthcheck timed out, but maybe there were other requests
                            // that succeeded in a given time window.
                            // If so, we can continue watching.
                            if channel.last_success_age() > HEALTH_CHECK_TIMEOUT {
                                return HealthCheckError::RequestError(Status::deadline_exceeded(format!("Healthcheck timeout {}ms exceeded", HEALTH_CHECK_TIMEOUT.as_millis())))
                            } else {
                                continue;
                            }
                        }
                    };
                    match resp {
                        Ok(_) => {
                            channel.report_success();
                            // continue watching
                        }
                        Err(status) => return HealthCheckError::RequestError(status),
                    }
                }
            }
        }
    }

    async fn make_request<T, O: Future<Output = Result<T, Status>>>(
        &self,
        uri: &Uri,
        f: &impl Fn(InterceptedService<Channel, AddTimeout>) -> O,
        timeout: Duration,
    ) -> Result<T, RequestFailure> {
        let channel = match self.get_or_create_pooled_channel(uri).await {
            Ok(channel) => channel,
            Err(tonic_error) => {
                return Err(RequestFailure::RequestConnection(tonic_error));
            }
        };

        let intercepted_channel =
            InterceptedService::new(channel.item().clone(), AddTimeout::new(timeout));

        let result: RequestFailure = select! {
            res = f(intercepted_channel) => {
                match res {
                    Ok(body) => {
                        channel.report_success();
                        return Ok(body);
                    },
                    Err(err) => RequestFailure::RequestError(err)
                }
            }
            res = self.check_connectability(uri) => {
               RequestFailure::HealthCheck(res)
            }
        };

        // After this point the request is not successful, but we can try to recover
        let last_success_age = channel.last_success_age();
        if last_success_age > CHANNEL_TTL {
            // There were no successful requests for a long time, we can try to reconnect
            // It might be possible that server died and changed its ip address
            self.drop_channel(uri, channel).await;
        } else {
            // We don't need this channel anymore, drop before waiting for the backoff
            drop(channel);
        }

        Err(result)
    }

    // Allows to use channel to `uri`. If there is no channels to specified uri - they will be created.
    pub async fn with_channel_timeout<T, O: Future<Output = Result<T, Status>>>(
        &self,
        uri: &Uri,
        f: impl Fn(InterceptedService<Channel, AddTimeout>) -> O,
        timeout: Option<Duration>,
        retries: usize,
    ) -> Result<T, RequestError<Status>> {
        let mut retries_left = retries;
        let mut attempt = 0;
        let max_timeout = timeout.unwrap_or_else(|| self.grpc_timeout + self.connection_timeout);

        loop {
            let request_result: Result<T, _> = self.make_request(uri, &f, max_timeout).await;

            let error_result = match request_result {
                Ok(body) => return Ok(body),
                Err(err) => err,
            };

            let action = match error_result {
                RequestFailure::HealthCheck(healthcheck_error) => {
                    match healthcheck_error {
                        HealthCheckError::NoChannel => {
                            // The channel pool was dropped during the request processing.
                            // Meaning that the peer is not available anymore.
                            // So we can just fail the request.
                            RetryAction::Fail(Status::unavailable(format!(
                                "Peer {uri} is not available"
                            )))
                        }
                        HealthCheckError::ConnectionError(error) => {
                            // Can't establish connection to the server during the healthcheck.
                            // Possible situation:
                            // - Server was killed during the request processing and request timed out.
                            // Actions:
                            // - retry no backoff
                            RetryAction::RetryImmediately(Status::unavailable(format!(
                                "Failed to connect to {uri}, error: {error}"
                            )))
                        }
                        HealthCheckError::RequestError(status) => {
                            // Channel might be unavailable or overloaded.
                            // Or server might be dead.
                            RetryAction::RetryWithBackoff(status)
                        }
                    }
                }
                RequestFailure::RequestError(status) => {
                    match status.code() {
                        Code::Cancelled | Code::Unavailable => {
                            // Possible situations:
                            // - Server is frozen and will never respond.
                            // - Server is overloaded and will respond in the future.
                            RetryAction::RetryWithBackoff(status)
                        }
                        Code::Internal => {
                            // Something is broken, but let's retry anyway, but only once.
                            RetryAction::RetryOnce(status)
                        }
                        _ => {
                            // No special handling, just fail already.
                            RetryAction::Fail(status)
                        }
                    }
                }
                RequestFailure::RequestConnection(error) => {
                    // Can't establish connection to the server during the request.
                    // Possible situation:
                    // - Server is killed
                    // - Server is overloaded
                    // Actions:
                    // - retry with backoff
                    RetryAction::RetryWithBackoff(Status::unavailable(format!(
                        "Failed to connect to {uri}, error: {error}"
                    )))
                }
            };

            let (backoff_time, fallback_status) = match action {
                RetryAction::Fail(err) => return Err(RequestError::FromClosure(err)),
                RetryAction::RetryImmediately(fallback_status) => (Duration::ZERO, fallback_status),
                RetryAction::RetryWithBackoff(fallback_status) => {
                    // Calculate backoff
                    let backoff = DEFAULT_BACKOFF * 2u32.pow(attempt as u32)
                        + Duration::from_millis(thread_rng().gen_range(0..100));

                    if backoff > max_timeout {
                        // We can't wait for the request any longer, return the error as is
                        return Err(RequestError::FromClosure(fallback_status));
                    }
                    (backoff, fallback_status)
                }
                RetryAction::RetryOnce(fallback_status) => {
                    if retries_left > 1 {
                        retries_left = 1;
                    }
                    (Duration::ZERO, fallback_status)
                }
            };

            attempt += 1;
            if retries_left == 0 {
                return Err(RequestError::FromClosure(fallback_status));
            }
            retries_left = retries_left.saturating_sub(1);

            // Wait for the backoff
            tokio::time::sleep(backoff_time).await;
        }
    }

    // Allows to use channel to `uri`. If there is no channels to specified uri - they will be created.
    pub async fn with_channel<T, O: Future<Output = Result<T, Status>>>(
        &self,
        uri: &Uri,
        f: impl Fn(InterceptedService<Channel, AddTimeout>) -> O,
    ) -> Result<T, RequestError<Status>> {
        self.with_channel_timeout(uri, f, None, DEFAULT_RETRIES)
            .await
    }
}