alamin655 commited on
Commit
d28cbb9
·
unverified ·
2 Parent(s): fc830c4 07bbea8

Merge branch 'rolling' into rolling

Browse files
Cargo.lock CHANGED
@@ -284,6 +284,21 @@ dependencies = [
284
  "memchr",
285
  ]
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  [[package]]
288
  name = "anes"
289
  version = "0.1.6"
@@ -326,6 +341,20 @@ version = "0.10.3"
326
  source = "registry+https://github.com/rust-lang/crates.io-index"
327
  checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  [[package]]
330
  name = "async-once-cell"
331
  version = "0.5.3"
@@ -437,6 +466,27 @@ dependencies = [
437
  "generic-array",
438
  ]
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  [[package]]
441
  name = "bstr"
442
  version = "1.7.0"
@@ -502,9 +552,9 @@ dependencies = [
502
 
503
  [[package]]
504
  name = "cargo-platform"
505
- version = "0.1.4"
506
  source = "registry+https://github.com/rust-lang/crates.io-index"
507
- checksum = "12024c4645c97566567129c204f65d5815a8c9aecf30fcbe682b2fe034996d36"
508
  dependencies = [
509
  "serde",
510
  ]
@@ -2840,6 +2890,7 @@ version = "0.11.22"
2840
  source = "registry+https://github.com/rust-lang/crates.io-index"
2841
  checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
2842
  dependencies = [
 
2843
  "base64 0.21.5",
2844
  "bytes 1.5.0",
2845
  "encoding_rs",
@@ -2865,6 +2916,7 @@ dependencies = [
2865
  "system-configuration",
2866
  "tokio 1.33.0",
2867
  "tokio-rustls",
 
2868
  "tower-service",
2869
  "url 2.4.1",
2870
  "wasm-bindgen",
@@ -2920,9 +2972,9 @@ dependencies = [
2920
 
2921
  [[package]]
2922
  name = "rustix"
2923
- version = "0.38.21"
2924
  source = "registry+https://github.com/rust-lang/crates.io-index"
2925
- checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3"
2926
  dependencies = [
2927
  "bitflags 2.4.1",
2928
  "errno",
 
284
  "memchr",
285
  ]
286
 
287
+ [[package]]
288
+ name = "alloc-no-stdlib"
289
+ version = "2.0.4"
290
+ source = "registry+https://github.com/rust-lang/crates.io-index"
291
+ checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
292
+
293
+ [[package]]
294
+ name = "alloc-stdlib"
295
+ version = "0.2.2"
296
+ source = "registry+https://github.com/rust-lang/crates.io-index"
297
+ checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
298
+ dependencies = [
299
+ "alloc-no-stdlib",
300
+ ]
301
+
302
  [[package]]
303
  name = "anes"
304
  version = "0.1.6"
 
341
  source = "registry+https://github.com/rust-lang/crates.io-index"
342
  checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
343
 
344
+ [[package]]
345
+ name = "async-compression"
346
+ version = "0.4.5"
347
+ source = "registry+https://github.com/rust-lang/crates.io-index"
348
+ checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
349
+ dependencies = [
350
+ "brotli",
351
+ "flate2",
352
+ "futures-core",
353
+ "memchr",
354
+ "pin-project-lite",
355
+ "tokio 1.34.0",
356
+ ]
357
+
358
  [[package]]
359
  name = "async-once-cell"
360
  version = "0.5.3"
 
466
  "generic-array",
467
  ]
468
 
469
+ [[package]]
470
+ name = "brotli"
471
+ version = "3.4.0"
472
+ source = "registry+https://github.com/rust-lang/crates.io-index"
473
+ checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
474
+ dependencies = [
475
+ "alloc-no-stdlib",
476
+ "alloc-stdlib",
477
+ "brotli-decompressor",
478
+ ]
479
+
480
+ [[package]]
481
+ name = "brotli-decompressor"
482
+ version = "2.5.1"
483
+ source = "registry+https://github.com/rust-lang/crates.io-index"
484
+ checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
485
+ dependencies = [
486
+ "alloc-no-stdlib",
487
+ "alloc-stdlib",
488
+ ]
489
+
490
  [[package]]
491
  name = "bstr"
492
  version = "1.7.0"
 
552
 
553
  [[package]]
554
  name = "cargo-platform"
555
+ version = "0.1.5"
556
  source = "registry+https://github.com/rust-lang/crates.io-index"
557
+ checksum = "e34637b3140142bdf929fb439e8aa4ebad7651ebf7b1080b3930aa16ac1459ff"
558
  dependencies = [
559
  "serde",
560
  ]
 
2890
  source = "registry+https://github.com/rust-lang/crates.io-index"
2891
  checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
2892
  dependencies = [
2893
+ "async-compression",
2894
  "base64 0.21.5",
2895
  "bytes 1.5.0",
2896
  "encoding_rs",
 
2916
  "system-configuration",
2917
  "tokio 1.33.0",
2918
  "tokio-rustls",
2919
+ "tokio-util",
2920
  "tower-service",
2921
  "url 2.4.1",
2922
  "wasm-bindgen",
 
2972
 
2973
  [[package]]
2974
  name = "rustix"
2975
+ version = "0.38.25"
2976
  source = "registry+https://github.com/rust-lang/crates.io-index"
2977
+ checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e"
2978
  dependencies = [
2979
  "bitflags 2.4.1",
2980
  "errno",
Cargo.toml CHANGED
@@ -13,7 +13,7 @@ bench = false
13
  path = "src/bin/websurfx.rs"
14
 
15
  [dependencies]
16
- reqwest = {version="0.11.22", default-features=false, features=["rustls-tls"]}
17
  tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
18
  serde = {version="1.0.190", default-features=false, features=["derive"]}
19
  serde_json = {version="1.0.108", default-features=false}
 
13
  path = "src/bin/websurfx.rs"
14
 
15
  [dependencies]
16
+ reqwest = {version="0.11.22", default-features=false, features=["rustls-tls","brotli", "gzip"]}
17
  tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
18
  serde = {version="1.0.190", default-features=false, features=["derive"]}
19
  serde_json = {version="1.0.108", default-features=false}
src/engines/brave.rs CHANGED
@@ -4,7 +4,7 @@
4
 
5
  use std::collections::HashMap;
6
 
7
- use reqwest::header::HeaderMap;
8
  use scraper::Html;
9
 
10
  use crate::models::aggregation_models::SearchResult;
@@ -42,7 +42,7 @@ impl SearchEngine for Brave {
42
  query: &str,
43
  page: u32,
44
  user_agent: &str,
45
- request_timeout: u8,
46
  safe_search: u8,
47
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
48
  let url = format!("https://search.brave.com/search?q={query}&offset={page}");
@@ -68,7 +68,7 @@ impl SearchEngine for Brave {
68
  .change_context(EngineError::UnexpectedError)?;
69
 
70
  let document: Html = Html::parse_document(
71
- &Brave::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
72
  );
73
 
74
  if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(0) {
 
4
 
5
  use std::collections::HashMap;
6
 
7
+ use reqwest::{header::HeaderMap, Client};
8
  use scraper::Html;
9
 
10
  use crate::models::aggregation_models::SearchResult;
 
42
  query: &str,
43
  page: u32,
44
  user_agent: &str,
45
+ client: &Client,
46
  safe_search: u8,
47
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
48
  let url = format!("https://search.brave.com/search?q={query}&offset={page}");
 
68
  .change_context(EngineError::UnexpectedError)?;
69
 
70
  let document: Html = Html::parse_document(
71
+ &Brave::fetch_html_from_upstream(self, &url, header_map, client).await?,
72
  );
73
 
74
  if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(0) {
src/engines/duckduckgo.rs CHANGED
@@ -5,6 +5,7 @@
5
  use std::collections::HashMap;
6
 
7
  use reqwest::header::HeaderMap;
 
8
  use scraper::Html;
9
 
10
  use crate::models::aggregation_models::SearchResult;
@@ -44,7 +45,7 @@ impl SearchEngine for DuckDuckGo {
44
  query: &str,
45
  page: u32,
46
  user_agent: &str,
47
- request_timeout: u8,
48
  _safe_search: u8,
49
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
50
  // Page number can be missing or empty string and so appropriate handling is required
@@ -76,7 +77,7 @@ impl SearchEngine for DuckDuckGo {
76
  .change_context(EngineError::UnexpectedError)?;
77
 
78
  let document: Html = Html::parse_document(
79
- &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
80
  );
81
 
82
  if self.parser.parse_for_no_results(&document).next().is_some() {
 
5
  use std::collections::HashMap;
6
 
7
  use reqwest::header::HeaderMap;
8
+ use reqwest::Client;
9
  use scraper::Html;
10
 
11
  use crate::models::aggregation_models::SearchResult;
 
45
  query: &str,
46
  page: u32,
47
  user_agent: &str,
48
+ client: &Client,
49
  _safe_search: u8,
50
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
51
  // Page number can be missing or empty string and so appropriate handling is required
 
77
  .change_context(EngineError::UnexpectedError)?;
78
 
79
  let document: Html = Html::parse_document(
80
+ &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, client).await?,
81
  );
82
 
83
  if self.parser.parse_for_no_results(&document).next().is_some() {
src/engines/searx.rs CHANGED
@@ -3,6 +3,7 @@
3
  //! number if provided.
4
 
5
  use reqwest::header::HeaderMap;
 
6
  use scraper::Html;
7
  use std::collections::HashMap;
8
 
@@ -40,7 +41,7 @@ impl SearchEngine for Searx {
40
  query: &str,
41
  page: u32,
42
  user_agent: &str,
43
- request_timeout: u8,
44
  mut safe_search: u8,
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
@@ -68,7 +69,7 @@ impl SearchEngine for Searx {
68
  .change_context(EngineError::UnexpectedError)?;
69
 
70
  let document: Html = Html::parse_document(
71
- &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
72
  );
73
 
74
  if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(1) {
 
3
  //! number if provided.
4
 
5
  use reqwest::header::HeaderMap;
6
+ use reqwest::Client;
7
  use scraper::Html;
8
  use std::collections::HashMap;
9
 
 
41
  query: &str,
42
  page: u32,
43
  user_agent: &str,
44
+ client: &Client,
45
  mut safe_search: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
 
69
  .change_context(EngineError::UnexpectedError)?;
70
 
71
  let document: Html = Html::parse_document(
72
+ &Searx::fetch_html_from_upstream(self, &url, header_map, client).await?,
73
  );
74
 
75
  if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(1) {
src/models/engine_models.rs CHANGED
@@ -3,7 +3,8 @@
3
 
4
  use super::aggregation_models::SearchResult;
5
  use error_stack::{Report, Result, ResultExt};
6
- use std::{collections::HashMap, fmt, time::Duration};
 
7
 
8
  /// A custom error type used for handle engine associated errors.
9
  #[derive(Debug)]
@@ -71,12 +72,11 @@ pub trait SearchEngine: Sync + Send {
71
  &self,
72
  url: &str,
73
  header_map: reqwest::header::HeaderMap,
74
- request_timeout: u8,
75
  ) -> Result<String, EngineError> {
76
  // fetch the html from upstream search engine
77
- Ok(reqwest::Client::new()
78
  .get(url)
79
- .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
80
  .headers(header_map) // add spoofed headers to emulate human behavior
81
  .send()
82
  .await
@@ -109,7 +109,7 @@ pub trait SearchEngine: Sync + Send {
109
  query: &str,
110
  page: u32,
111
  user_agent: &str,
112
- request_timeout: u8,
113
  safe_search: u8,
114
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
115
  }
 
3
 
4
  use super::aggregation_models::SearchResult;
5
  use error_stack::{Report, Result, ResultExt};
6
+ use reqwest::Client;
7
+ use std::{collections::HashMap, fmt};
8
 
9
  /// A custom error type used for handle engine associated errors.
10
  #[derive(Debug)]
 
72
  &self,
73
  url: &str,
74
  header_map: reqwest::header::HeaderMap,
75
+ client: &Client,
76
  ) -> Result<String, EngineError> {
77
  // fetch the html from upstream search engine
78
+ Ok(client
79
  .get(url)
 
80
  .headers(header_map) // add spoofed headers to emulate human behavior
81
  .send()
82
  .await
 
109
  query: &str,
110
  page: u32,
111
  user_agent: &str,
112
+ client: &Client,
113
  safe_search: u8,
114
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
115
  }
src/results/aggregator.rs CHANGED
@@ -9,6 +9,7 @@ use crate::models::{
9
  };
10
  use error_stack::Report;
11
  use regex::Regex;
 
12
  use std::time::{SystemTime, UNIX_EPOCH};
13
  use std::{
14
  collections::HashMap,
@@ -18,6 +19,9 @@ use std::{
18
  use std::{fs::File, io::BufRead};
19
  use tokio::task::JoinHandle;
20
 
 
 
 
21
  /// Aliases for long type annotations
22
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
23
 
@@ -68,6 +72,16 @@ pub async fn aggregate(
68
  request_timeout: u8,
69
  safe_search: u8,
70
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
 
 
 
 
 
 
 
 
 
 
71
  let user_agent: &str = random_user_agent();
72
 
73
  // Add a random delay before making the request.
@@ -88,7 +102,7 @@ pub async fn aggregate(
88
  let query: String = query.to_owned();
89
  tasks.push(tokio::spawn(async move {
90
  search_engine
91
- .results(&query, page, user_agent, request_timeout, safe_search)
92
  .await
93
  }));
94
  }
 
9
  };
10
  use error_stack::Report;
11
  use regex::Regex;
12
+ use reqwest::{Client, ClientBuilder};
13
  use std::time::{SystemTime, UNIX_EPOCH};
14
  use std::{
15
  collections::HashMap,
 
19
  use std::{fs::File, io::BufRead};
20
  use tokio::task::JoinHandle;
21
 
22
+ /// A constant for holding the prebuilt Client globally in the app.
23
+ static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
24
+
25
  /// Aliases for long type annotations
26
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
27
 
 
72
  request_timeout: u8,
73
  safe_search: u8,
74
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
75
+ let client = CLIENT.get_or_init(|| {
76
+ ClientBuilder::new()
77
+ .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
78
+ .https_only(true)
79
+ .gzip(true)
80
+ .brotli(true)
81
+ .build()
82
+ .unwrap()
83
+ });
84
+
85
  let user_agent: &str = random_user_agent();
86
 
87
  // Add a random delay before making the request.
 
102
  let query: String = query.to_owned();
103
  tasks.push(tokio::spawn(async move {
104
  search_engine
105
+ .results(&query, page, user_agent, client, safe_search)
106
  .await
107
  }));
108
  }