Łukasz Mariański neon_arch commited on
Commit
c796ae8
·
unverified ·
1 Parent(s): 913ca1b

✨ Option to use a proxy for outgoing `upstream search engine` requests (#573)

Browse files

* Add option to use a proxy for outgoing (search engine) requests.

* Enable socks feature in reqwest

* Fix formatting

* add proxy feature

* Update src/config/parser.rs

Co-authored-by: neon_arch <[email protected]>

* Update websurfx/config.lua

Co-authored-by: neon_arch <[email protected]>

* Update Cargo.toml

Co-authored-by: neon_arch <[email protected]>

* fix

* Update Cargo.toml

Co-authored-by: neon_arch <[email protected]>

---------

Co-authored-by: neon_arch <[email protected]>

Cargo.toml CHANGED
@@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [
17
  "rustls-tls",
18
  "brotli",
19
  "gzip",
20
- "http2"
 
21
  ] }
22
  tokio = { version = "1.32.0", features = [
23
  "rt-multi-thread",
 
17
  "rustls-tls",
18
  "brotli",
19
  "gzip",
20
+ "http2",
21
+ "socks",
22
  ] }
23
  tokio = { version = "1.32.0", features = [
24
  "rt-multi-thread",
src/config/parser.rs CHANGED
@@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType};
6
  use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
  use mlua::Lua;
 
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
@@ -48,6 +49,8 @@ pub struct Config {
48
  pub tcp_connection_keep_alive: u8,
49
  /// It stores the pool idle connection timeout in seconds.
50
  pub pool_idle_connection_timeout: u8,
 
 
51
  /// It stores the number of https connections to keep in the pool.
52
  pub number_of_https_connections: u8,
53
  }
@@ -120,6 +123,14 @@ impl Config {
120
  _ => parsed_cet,
121
  };
122
 
 
 
 
 
 
 
 
 
123
  Ok(Config {
124
  port: globals.get::<_, u16>("port")?,
125
  binding_ip: globals.get::<_, String>("binding_ip")?,
@@ -151,6 +162,7 @@ impl Config {
151
  safe_search,
152
  #[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
153
  cache_expiry_time,
 
154
  })
155
  }
156
  }
 
6
  use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
  use mlua::Lua;
9
+ use reqwest::Proxy;
10
  use std::{collections::HashMap, fs, thread::available_parallelism};
11
 
12
  /// A named struct which stores the parsed config file options.
 
49
  pub tcp_connection_keep_alive: u8,
50
  /// It stores the pool idle connection timeout in seconds.
51
  pub pool_idle_connection_timeout: u8,
52
+ /// Url of the proxy to use for outgoing requests.
53
+ pub proxy: Option<Proxy>,
54
  /// It stores the number of https connections to keep in the pool.
55
  pub number_of_https_connections: u8,
56
  }
 
123
  _ => parsed_cet,
124
  };
125
 
126
+ let proxy_opt = globals.get::<_, Option<String>>("proxy")?;
127
+ let proxy = proxy_opt.and_then(|proxy_str| {
128
+ Proxy::all(proxy_str).ok().and_then(|_| {
129
+ log::error!("Invalid proxy url, defaulting to no proxy.");
130
+ None
131
+ })
132
+ });
133
+
134
  Ok(Config {
135
  port: globals.get::<_, u16>("port")?,
136
  binding_ip: globals.get::<_, String>("binding_ip")?,
 
162
  safe_search,
163
  #[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
164
  cache_expiry_time,
165
+ proxy,
166
  })
167
  }
168
  }
src/results/aggregator.rs CHANGED
@@ -75,7 +75,7 @@ pub async fn aggregate(
75
  safe_search: u8,
76
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
77
  let client = CLIENT.get_or_init(|| {
78
- ClientBuilder::new()
79
  .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
80
  .pool_idle_timeout(Duration::from_secs(
81
  config.pool_idle_connection_timeout as u64,
@@ -86,9 +86,13 @@ pub async fn aggregate(
86
  .https_only(true)
87
  .gzip(true)
88
  .brotli(true)
89
- .http2_adaptive_window(config.adaptive_window)
90
- .build()
91
- .unwrap()
 
 
 
 
92
  });
93
 
94
  let user_agent: &str = random_user_agent();
@@ -242,6 +246,7 @@ pub async fn filter_with_lists(
242
 
243
  Ok(())
244
  }
 
245
  /// Sorts SearchResults by relevance score.
246
  /// <br> sort_unstable is used as its faster,stability is not an issue on our side.
247
  /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
@@ -257,6 +262,7 @@ fn sort_search_results(results: &mut [SearchResult]) {
257
  .unwrap_or(Ordering::Less)
258
  })
259
  }
 
260
  #[cfg(test)]
261
  mod tests {
262
  use super::*;
 
75
  safe_search: u8,
76
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
77
  let client = CLIENT.get_or_init(|| {
78
+ let mut cb = ClientBuilder::new()
79
  .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
80
  .pool_idle_timeout(Duration::from_secs(
81
  config.pool_idle_connection_timeout as u64,
 
86
  .https_only(true)
87
  .gzip(true)
88
  .brotli(true)
89
+ .http2_adaptive_window(config.adaptive_window);
90
+
91
+ if config.proxy.is_some() {
92
+ cb = cb.proxy(config.proxy.clone().unwrap());
93
+ }
94
+
95
+ cb.build().unwrap()
96
  });
97
 
98
  let user_agent: &str = random_user_agent();
 
246
 
247
  Ok(())
248
  }
249
+
250
  /// Sorts SearchResults by relevance score.
251
  /// <br> sort_unstable is used as its faster,stability is not an issue on our side.
252
  /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
 
262
  .unwrap_or(Ordering::Less)
263
  })
264
  }
265
+
266
  #[cfg(test)]
267
  mod tests {
268
  use super::*;
websurfx/config.lua CHANGED
@@ -75,3 +75,5 @@ upstream_search_engines = {
75
  Mojeek = false,
76
  Bing = false,
77
  } -- select the upstream search engines from which the results should be fetched.
 
 
 
75
  Mojeek = false,
76
  Bing = false,
77
  } -- select the upstream search engines from which the results should be fetched.
78
+
79
+ proxy = nil -- Proxy to send outgoing requests through. Set to nil to disable.