Spaces:
Running
Running
Łukasz Mariański
neon_arch
commited on
✨ Option to use a proxy for outgoing `upstream search engine` requests (#573)
Browse files* Add option to use a proxy for outgoing (search engine) requests.
* Enable socks feature in reqwest
* Fix formatting
* add proxy feature
* Update src/config/parser.rs
Co-authored-by: neon_arch <[email protected]>
* Update websurfx/config.lua
Co-authored-by: neon_arch <[email protected]>
* Update Cargo.toml
Co-authored-by: neon_arch <[email protected]>
* fix
* Update Cargo.toml
Co-authored-by: neon_arch <[email protected]>
---------
Co-authored-by: neon_arch <[email protected]>
- Cargo.toml +2 -1
- src/config/parser.rs +12 -0
- src/results/aggregator.rs +10 -4
- websurfx/config.lua +2 -0
Cargo.toml
CHANGED
@@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [
|
|
17 |
"rustls-tls",
|
18 |
"brotli",
|
19 |
"gzip",
|
20 |
-
"http2"
|
|
|
21 |
] }
|
22 |
tokio = { version = "1.32.0", features = [
|
23 |
"rt-multi-thread",
|
|
|
17 |
"rustls-tls",
|
18 |
"brotli",
|
19 |
"gzip",
|
20 |
+
"http2",
|
21 |
+
"socks",
|
22 |
] }
|
23 |
tokio = { version = "1.32.0", features = [
|
24 |
"rt-multi-thread",
|
src/config/parser.rs
CHANGED
@@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType};
|
|
6 |
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
use mlua::Lua;
|
|
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
@@ -48,6 +49,8 @@ pub struct Config {
|
|
48 |
pub tcp_connection_keep_alive: u8,
|
49 |
/// It stores the pool idle connection timeout in seconds.
|
50 |
pub pool_idle_connection_timeout: u8,
|
|
|
|
|
51 |
/// It stores the number of https connections to keep in the pool.
|
52 |
pub number_of_https_connections: u8,
|
53 |
}
|
@@ -120,6 +123,14 @@ impl Config {
|
|
120 |
_ => parsed_cet,
|
121 |
};
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
Ok(Config {
|
124 |
port: globals.get::<_, u16>("port")?,
|
125 |
binding_ip: globals.get::<_, String>("binding_ip")?,
|
@@ -151,6 +162,7 @@ impl Config {
|
|
151 |
safe_search,
|
152 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
153 |
cache_expiry_time,
|
|
|
154 |
})
|
155 |
}
|
156 |
}
|
|
|
6 |
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
use mlua::Lua;
|
9 |
+
use reqwest::Proxy;
|
10 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
11 |
|
12 |
/// A named struct which stores the parsed config file options.
|
|
|
49 |
pub tcp_connection_keep_alive: u8,
|
50 |
/// It stores the pool idle connection timeout in seconds.
|
51 |
pub pool_idle_connection_timeout: u8,
|
52 |
+
/// Url of the proxy to use for outgoing requests.
|
53 |
+
pub proxy: Option<Proxy>,
|
54 |
/// It stores the number of https connections to keep in the pool.
|
55 |
pub number_of_https_connections: u8,
|
56 |
}
|
|
|
123 |
_ => parsed_cet,
|
124 |
};
|
125 |
|
126 |
+
let proxy_opt = globals.get::<_, Option<String>>("proxy")?;
|
127 |
+
let proxy = proxy_opt.and_then(|proxy_str| {
|
128 |
+
Proxy::all(proxy_str).ok().and_then(|_| {
|
129 |
+
log::error!("Invalid proxy url, defaulting to no proxy.");
|
130 |
+
None
|
131 |
+
})
|
132 |
+
});
|
133 |
+
|
134 |
Ok(Config {
|
135 |
port: globals.get::<_, u16>("port")?,
|
136 |
binding_ip: globals.get::<_, String>("binding_ip")?,
|
|
|
162 |
safe_search,
|
163 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
164 |
cache_expiry_time,
|
165 |
+
proxy,
|
166 |
})
|
167 |
}
|
168 |
}
|
src/results/aggregator.rs
CHANGED
@@ -75,7 +75,7 @@ pub async fn aggregate(
|
|
75 |
safe_search: u8,
|
76 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
77 |
let client = CLIENT.get_or_init(|| {
|
78 |
-
ClientBuilder::new()
|
79 |
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
80 |
.pool_idle_timeout(Duration::from_secs(
|
81 |
config.pool_idle_connection_timeout as u64,
|
@@ -86,9 +86,13 @@ pub async fn aggregate(
|
|
86 |
.https_only(true)
|
87 |
.gzip(true)
|
88 |
.brotli(true)
|
89 |
-
.http2_adaptive_window(config.adaptive_window)
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
92 |
});
|
93 |
|
94 |
let user_agent: &str = random_user_agent();
|
@@ -242,6 +246,7 @@ pub async fn filter_with_lists(
|
|
242 |
|
243 |
Ok(())
|
244 |
}
|
|
|
245 |
/// Sorts SearchResults by relevance score.
|
246 |
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
|
247 |
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
|
@@ -257,6 +262,7 @@ fn sort_search_results(results: &mut [SearchResult]) {
|
|
257 |
.unwrap_or(Ordering::Less)
|
258 |
})
|
259 |
}
|
|
|
260 |
#[cfg(test)]
|
261 |
mod tests {
|
262 |
use super::*;
|
|
|
75 |
safe_search: u8,
|
76 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
77 |
let client = CLIENT.get_or_init(|| {
|
78 |
+
let mut cb = ClientBuilder::new()
|
79 |
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
80 |
.pool_idle_timeout(Duration::from_secs(
|
81 |
config.pool_idle_connection_timeout as u64,
|
|
|
86 |
.https_only(true)
|
87 |
.gzip(true)
|
88 |
.brotli(true)
|
89 |
+
.http2_adaptive_window(config.adaptive_window);
|
90 |
+
|
91 |
+
if config.proxy.is_some() {
|
92 |
+
cb = cb.proxy(config.proxy.clone().unwrap());
|
93 |
+
}
|
94 |
+
|
95 |
+
cb.build().unwrap()
|
96 |
});
|
97 |
|
98 |
let user_agent: &str = random_user_agent();
|
|
|
246 |
|
247 |
Ok(())
|
248 |
}
|
249 |
+
|
250 |
/// Sorts SearchResults by relevance score.
|
251 |
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
|
252 |
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
|
|
|
262 |
.unwrap_or(Ordering::Less)
|
263 |
})
|
264 |
}
|
265 |
+
|
266 |
#[cfg(test)]
|
267 |
mod tests {
|
268 |
use super::*;
|
websurfx/config.lua
CHANGED
@@ -75,3 +75,5 @@ upstream_search_engines = {
|
|
75 |
Mojeek = false,
|
76 |
Bing = false,
|
77 |
} -- select the upstream search engines from which the results should be fetched.
|
|
|
|
|
|
75 |
Mojeek = false,
|
76 |
Bing = false,
|
77 |
} -- select the upstream search engines from which the results should be fetched.
|
78 |
+
|
79 |
+
proxy = nil -- Proxy to send outgoing requests through. Set to nil to disable.
|