Spaces:
Running
Running
neon_arch
commited on
Commit
·
b42adaa
1
Parent(s):
05bf05b
⚡️ perf: rewrite the code by using a constant storing a prebuilt client globally for each thread (#384)
Browse files- src/models/engine_models.rs +5 -5
- src/results/aggregator.rs +15 -1
src/models/engine_models.rs
CHANGED
@@ -3,7 +3,8 @@
|
|
3 |
|
4 |
use super::aggregation_models::SearchResult;
|
5 |
use error_stack::{Report, Result, ResultExt};
|
6 |
-
use
|
|
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
9 |
#[derive(Debug)]
|
@@ -71,12 +72,11 @@ pub trait SearchEngine: Sync + Send {
|
|
71 |
&self,
|
72 |
url: &str,
|
73 |
header_map: reqwest::header::HeaderMap,
|
74 |
-
|
75 |
) -> Result<String, EngineError> {
|
76 |
// fetch the html from upstream search engine
|
77 |
-
Ok(
|
78 |
.get(url)
|
79 |
-
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
80 |
.headers(header_map) // add spoofed headers to emulate human behavior
|
81 |
.send()
|
82 |
.await
|
@@ -109,7 +109,7 @@ pub trait SearchEngine: Sync + Send {
|
|
109 |
query: &str,
|
110 |
page: u32,
|
111 |
user_agent: &str,
|
112 |
-
|
113 |
safe_search: u8,
|
114 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
115 |
}
|
|
|
3 |
|
4 |
use super::aggregation_models::SearchResult;
|
5 |
use error_stack::{Report, Result, ResultExt};
|
6 |
+
use reqwest::Client;
|
7 |
+
use std::{collections::HashMap, fmt};
|
8 |
|
9 |
/// A custom error type used for handle engine associated errors.
|
10 |
#[derive(Debug)]
|
|
|
72 |
&self,
|
73 |
url: &str,
|
74 |
header_map: reqwest::header::HeaderMap,
|
75 |
+
client: &Client,
|
76 |
) -> Result<String, EngineError> {
|
77 |
// fetch the html from upstream search engine
|
78 |
+
Ok(client
|
79 |
.get(url)
|
|
|
80 |
.headers(header_map) // add spoofed headers to emulate human behavior
|
81 |
.send()
|
82 |
.await
|
|
|
109 |
query: &str,
|
110 |
page: u32,
|
111 |
user_agent: &str,
|
112 |
+
client: &Client,
|
113 |
safe_search: u8,
|
114 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
115 |
}
|
src/results/aggregator.rs
CHANGED
@@ -9,6 +9,7 @@ use crate::models::{
|
|
9 |
};
|
10 |
use error_stack::Report;
|
11 |
use regex::Regex;
|
|
|
12 |
use std::time::{SystemTime, UNIX_EPOCH};
|
13 |
use std::{
|
14 |
collections::HashMap,
|
@@ -18,6 +19,9 @@ use std::{
|
|
18 |
use std::{fs::File, io::BufRead};
|
19 |
use tokio::task::JoinHandle;
|
20 |
|
|
|
|
|
|
|
21 |
/// Aliases for long type annotations
|
22 |
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
23 |
|
@@ -68,6 +72,16 @@ pub async fn aggregate(
|
|
68 |
request_timeout: u8,
|
69 |
safe_search: u8,
|
70 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
let user_agent: &str = random_user_agent();
|
72 |
|
73 |
// Add a random delay before making the request.
|
@@ -88,7 +102,7 @@ pub async fn aggregate(
|
|
88 |
let query: String = query.to_owned();
|
89 |
tasks.push(tokio::spawn(async move {
|
90 |
search_engine
|
91 |
-
.results(&query, page, user_agent,
|
92 |
.await
|
93 |
}));
|
94 |
}
|
|
|
9 |
};
|
10 |
use error_stack::Report;
|
11 |
use regex::Regex;
|
12 |
+
use reqwest::{Client, ClientBuilder};
|
13 |
use std::time::{SystemTime, UNIX_EPOCH};
|
14 |
use std::{
|
15 |
collections::HashMap,
|
|
|
19 |
use std::{fs::File, io::BufRead};
|
20 |
use tokio::task::JoinHandle;
|
21 |
|
22 |
+
/// A constant for holding the prebuilt Client globally in the app.
|
23 |
+
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
|
24 |
+
|
25 |
/// Aliases for long type annotations
|
26 |
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
27 |
|
|
|
72 |
request_timeout: u8,
|
73 |
safe_search: u8,
|
74 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
75 |
+
let client = CLIENT.get_or_init(|| {
|
76 |
+
ClientBuilder::new()
|
77 |
+
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
78 |
+
.https_only(true)
|
79 |
+
.gzip(true)
|
80 |
+
.brotli(true)
|
81 |
+
.build()
|
82 |
+
.unwrap()
|
83 |
+
});
|
84 |
+
|
85 |
let user_agent: &str = random_user_agent();
|
86 |
|
87 |
// Add a random delay before making the request.
|
|
|
102 |
let query: String = query.to_owned();
|
103 |
tasks.push(tokio::spawn(async move {
|
104 |
search_engine
|
105 |
+
.results(&query, page, user_agent, client, safe_search)
|
106 |
.await
|
107 |
}));
|
108 |
}
|