neon_arch commited on
Commit
b42adaa
·
1 Parent(s): 05bf05b

⚡️ perf: rewrite the code by using a constant storing a prebuilt client globally for each thread (#384)

Browse files
src/models/engine_models.rs CHANGED
@@ -3,7 +3,8 @@
3
 
4
  use super::aggregation_models::SearchResult;
5
  use error_stack::{Report, Result, ResultExt};
6
- use std::{collections::HashMap, fmt, time::Duration};
 
7
 
8
  /// A custom error type used for handle engine associated errors.
9
  #[derive(Debug)]
@@ -71,12 +72,11 @@ pub trait SearchEngine: Sync + Send {
71
  &self,
72
  url: &str,
73
  header_map: reqwest::header::HeaderMap,
74
- request_timeout: u8,
75
  ) -> Result<String, EngineError> {
76
  // fetch the html from upstream search engine
77
- Ok(reqwest::Client::new()
78
  .get(url)
79
- .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
80
  .headers(header_map) // add spoofed headers to emulate human behavior
81
  .send()
82
  .await
@@ -109,7 +109,7 @@ pub trait SearchEngine: Sync + Send {
109
  query: &str,
110
  page: u32,
111
  user_agent: &str,
112
- request_timeout: u8,
113
  safe_search: u8,
114
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
115
  }
 
3
 
4
  use super::aggregation_models::SearchResult;
5
  use error_stack::{Report, Result, ResultExt};
6
+ use reqwest::Client;
7
+ use std::{collections::HashMap, fmt};
8
 
9
  /// A custom error type used for handle engine associated errors.
10
  #[derive(Debug)]
 
72
  &self,
73
  url: &str,
74
  header_map: reqwest::header::HeaderMap,
75
+ client: &Client,
76
  ) -> Result<String, EngineError> {
77
  // fetch the html from upstream search engine
78
+ Ok(client
79
  .get(url)
 
80
  .headers(header_map) // add spoofed headers to emulate human behavior
81
  .send()
82
  .await
 
109
  query: &str,
110
  page: u32,
111
  user_agent: &str,
112
+ client: &Client,
113
  safe_search: u8,
114
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
115
  }
src/results/aggregator.rs CHANGED
@@ -9,6 +9,7 @@ use crate::models::{
9
  };
10
  use error_stack::Report;
11
  use regex::Regex;
 
12
  use std::time::{SystemTime, UNIX_EPOCH};
13
  use std::{
14
  collections::HashMap,
@@ -18,6 +19,9 @@ use std::{
18
  use std::{fs::File, io::BufRead};
19
  use tokio::task::JoinHandle;
20
 
 
 
 
21
  /// Aliases for long type annotations
22
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
23
 
@@ -68,6 +72,16 @@ pub async fn aggregate(
68
  request_timeout: u8,
69
  safe_search: u8,
70
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
 
 
 
 
 
 
 
 
 
 
71
  let user_agent: &str = random_user_agent();
72
 
73
  // Add a random delay before making the request.
@@ -88,7 +102,7 @@ pub async fn aggregate(
88
  let query: String = query.to_owned();
89
  tasks.push(tokio::spawn(async move {
90
  search_engine
91
- .results(&query, page, user_agent, request_timeout, safe_search)
92
  .await
93
  }));
94
  }
 
9
  };
10
  use error_stack::Report;
11
  use regex::Regex;
12
+ use reqwest::{Client, ClientBuilder};
13
  use std::time::{SystemTime, UNIX_EPOCH};
14
  use std::{
15
  collections::HashMap,
 
19
  use std::{fs::File, io::BufRead};
20
  use tokio::task::JoinHandle;
21
 
22
+ /// A constant for holding the prebuilt Client globally in the app.
23
+ static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
24
+
25
  /// Aliases for long type annotations
26
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
27
 
 
72
  request_timeout: u8,
73
  safe_search: u8,
74
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
75
+ let client = CLIENT.get_or_init(|| {
76
+ ClientBuilder::new()
77
+ .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
78
+ .https_only(true)
79
+ .gzip(true)
80
+ .brotli(true)
81
+ .build()
82
+ .unwrap()
83
+ });
84
+
85
  let user_agent: &str = random_user_agent();
86
 
87
  // Add a random delay before making the request.
 
102
  let query: String = query.to_owned();
103
  tasks.push(tokio::spawn(async move {
104
  search_engine
105
+ .results(&query, page, user_agent, client, safe_search)
106
  .await
107
  }));
108
  }