Spaces:
Running
Running
Merge remote-tracking branch 'upstream/rolling' into hf-rolling
Browse files- .github/workflows/release.yml +1 -1
- .mergify.yml +12 -7
- Cargo.lock +0 -0
- Cargo.toml +86 -35
- dev.Dockerfile +1 -1
- flake.lock +3 -3
- flake.nix +1 -1
- public/images/close.svg +1 -0
- public/static/index.js +4 -32
- public/static/pagination.js +0 -39
- public/static/search_area_options.js +0 -18
- public/static/themes/simple.css +53 -15
- src/cache/cacher.rs +31 -18
- src/cache/redis_cacher.rs +15 -6
- src/config/parser.rs +42 -21
- src/engines/bing.rs +6 -8
- src/engines/brave.rs +4 -4
- src/engines/common.rs +23 -0
- src/engines/duckduckgo.rs +4 -4
- src/engines/librex.rs +5 -5
- src/engines/mod.rs +2 -0
- src/engines/mojeek.rs +10 -15
- src/engines/searx.rs +4 -4
- src/engines/startpage.rs +4 -4
- src/engines/wikipedia.rs +101 -0
- src/lib.rs +7 -2
- src/models/aggregation_models.rs +56 -32
- src/models/engine_models.rs +4 -0
- src/models/parser_models.rs +2 -2
- src/models/server_models.rs +14 -4
- src/results/aggregator.rs +30 -28
- src/server/routes/export_import.rs +194 -0
- src/server/routes/mod.rs +1 -0
- src/server/routes/search.rs +30 -30
- src/templates/partials/bar.rs +6 -2
- src/templates/partials/search_bar.rs +6 -5
- src/templates/partials/settings_tabs/engines.rs +1 -1
- src/templates/partials/settings_tabs/general.rs +15 -0
- src/templates/partials/settings_tabs/user_interface.rs +2 -2
- src/templates/views/search.rs +4 -4
- websurfx/config.lua +10 -1
.github/workflows/release.yml
CHANGED
@@ -61,7 +61,7 @@ jobs:
|
|
61 |
env:
|
62 |
GH_TOKEN: ${{ secrets.ADMIN_RIGHTS_TOKEN }}
|
63 |
- name: Create Release
|
64 |
-
uses: softprops/action-gh-release@
|
65 |
with:
|
66 |
token: ${{ secrets.ADMIN_RIGHTS_TOKEN }}
|
67 |
generate_release_notes: true
|
|
|
61 |
env:
|
62 |
GH_TOKEN: ${{ secrets.ADMIN_RIGHTS_TOKEN }}
|
63 |
- name: Create Release
|
64 |
+
uses: softprops/action-gh-release@v2
|
65 |
with:
|
66 |
token: ${{ secrets.ADMIN_RIGHTS_TOKEN }}
|
67 |
generate_release_notes: true
|
.mergify.yml
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
-
|
2 |
-
- name:
|
3 |
-
|
4 |
- "#approved-reviews-by>=2"
|
5 |
- check-success=build (stable)
|
6 |
- check-success=CodeFactor
|
7 |
- check-success=Rust project
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
11 |
- name: automatic update of pull requests where more 5 commits behind
|
12 |
conditions:
|
13 |
- "#commits-behind>5"
|
@@ -17,4 +18,8 @@ pull_request_rules:
|
|
17 |
conditions:
|
18 |
- merged
|
19 |
actions:
|
20 |
-
delete_head_branch: {}
|
|
|
|
|
|
|
|
|
|
1 |
+
queue_rules:
|
2 |
+
- name: default
|
3 |
+
queue_conditions:
|
4 |
- "#approved-reviews-by>=2"
|
5 |
- check-success=build (stable)
|
6 |
- check-success=CodeFactor
|
7 |
- check-success=Rust project
|
8 |
+
merge_conditions: []
|
9 |
+
merge_method: squash
|
10 |
+
|
11 |
+
pull_request_rules:
|
12 |
- name: automatic update of pull requests where more 5 commits behind
|
13 |
conditions:
|
14 |
- "#commits-behind>5"
|
|
|
18 |
conditions:
|
19 |
- merged
|
20 |
actions:
|
21 |
+
delete_head_branch: {}
|
22 |
+
- name: Automatic merge on approval
|
23 |
+
conditions: []
|
24 |
+
actions:
|
25 |
+
queue:
|
Cargo.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
-
version = "1.
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
@@ -13,24 +13,27 @@ bench = false
|
|
13 |
path = "src/bin/websurfx.rs"
|
14 |
|
15 |
[dependencies]
|
16 |
-
reqwest = { version = "0.
|
17 |
"rustls-tls",
|
18 |
"brotli",
|
19 |
"gzip",
|
|
|
|
|
20 |
] }
|
21 |
-
tokio = { version = "1.
|
22 |
"rt-multi-thread",
|
23 |
"macros",
|
24 |
"fs",
|
25 |
"io-util",
|
26 |
], default-features = false }
|
27 |
-
serde = { version = "1.0.
|
28 |
-
serde_json = { version = "1.0.
|
29 |
-
|
|
|
30 |
"actix-web",
|
31 |
] }
|
32 |
-
scraper = { version = "0.
|
33 |
-
actix-web = { version = "4.
|
34 |
"cookies",
|
35 |
"macros",
|
36 |
"compress-brotli",
|
@@ -38,35 +41,32 @@ actix-web = { version = "4.4.0", features = [
|
|
38 |
actix-files = { version = "0.6.5", default-features = false }
|
39 |
actix-cors = { version = "0.7.0", default-features = false }
|
40 |
fake-useragent = { version = "0.1.3", default-features = false }
|
41 |
-
env_logger = { version = "0.11.
|
42 |
log = { version = "0.4.21", default-features = false }
|
43 |
-
mlua = { version = "0.
|
44 |
"luajit",
|
45 |
"vendored",
|
46 |
], default-features = false }
|
47 |
-
redis = { version = "0.
|
48 |
"tokio-comp",
|
49 |
"connection-manager",
|
|
|
50 |
], default-features = false, optional = true }
|
51 |
-
blake3 = { version = "1.5.
|
52 |
error-stack = { version = "0.4.0", default-features = false, features = [
|
53 |
"std",
|
54 |
] }
|
55 |
-
async-trait = { version = "0.1.
|
56 |
-
regex = { version = "1.
|
57 |
-
|
58 |
-
"union",
|
59 |
-
"serde",
|
60 |
-
], default-features = false }
|
61 |
-
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
62 |
dhat = { version = "0.3.2", optional = true, default-features = false }
|
63 |
-
mimalloc = { version = "0.1.
|
64 |
-
async-once-cell = { version = "0.5.
|
65 |
-
actix-governor = { version = "0.
|
66 |
-
|
67 |
-
"
|
68 |
] }
|
69 |
-
async-compression = { version = "0.4.
|
70 |
"brotli",
|
71 |
"tokio",
|
72 |
], optional = true }
|
@@ -75,31 +75,38 @@ chacha20poly1305 = { version = "0.10.1", default-features = false, features = [
|
|
75 |
"getrandom",
|
76 |
], optional = true }
|
77 |
chacha20 = { version = "0.9.1", default-features = false, optional = true }
|
78 |
-
base64 = { version = "0.
|
79 |
"std",
|
80 |
], optional = true }
|
81 |
cfg-if = { version = "1.0.0", default-features = false, optional = true }
|
82 |
-
keyword_extraction = { version = "1.
|
83 |
"tf_idf",
|
84 |
-
|
85 |
-
|
86 |
] }
|
87 |
-
|
88 |
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
|
89 |
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
|
90 |
"moby",
|
91 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
[dev-dependencies]
|
94 |
rusty-hook = { version = "^0.11.2", default-features = false }
|
95 |
criterion = { version = "0.5.1", default-features = false }
|
96 |
-
tempfile = { version = "3.
|
97 |
|
98 |
[build-dependencies]
|
99 |
-
lightningcss = { version = "1.0.0-alpha.
|
100 |
"grid",
|
101 |
] }
|
102 |
-
|
|
|
|
|
|
|
103 |
|
104 |
[profile.dev]
|
105 |
opt-level = 0
|
@@ -127,11 +134,55 @@ codegen-units = 1
|
|
127 |
rpath = false
|
128 |
strip = "symbols"
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
[features]
|
131 |
use-synonyms-search = ["thesaurus/static"]
|
132 |
default = ["memory-cache"]
|
133 |
dhat-heap = ["dep:dhat"]
|
134 |
-
memory-cache = ["dep:
|
135 |
redis-cache = ["dep:redis", "dep:base64"]
|
136 |
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
|
137 |
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
|
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
+
version = "1.21.0"
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
|
|
13 |
path = "src/bin/websurfx.rs"
|
14 |
|
15 |
[dependencies]
|
16 |
+
reqwest = { version = "0.12.5", default-features = false, features = [
|
17 |
"rustls-tls",
|
18 |
"brotli",
|
19 |
"gzip",
|
20 |
+
"http2",
|
21 |
+
"socks",
|
22 |
] }
|
23 |
+
tokio = { version = "1.43.0", features = [
|
24 |
"rt-multi-thread",
|
25 |
"macros",
|
26 |
"fs",
|
27 |
"io-util",
|
28 |
], default-features = false }
|
29 |
+
serde = { version = "1.0.215", default-features = false, features = ["derive"] }
|
30 |
+
serde_json = { version = "1.0.138", default-features = false }
|
31 |
+
bincode = {version="1.3.3", default-features=false}
|
32 |
+
maud = { version = "0.26.0", default-features = false, features = [
|
33 |
"actix-web",
|
34 |
] }
|
35 |
+
scraper = { version = "0.21.0", default-features = false }
|
36 |
+
actix-web = { version = "4.9.0", features = [
|
37 |
"cookies",
|
38 |
"macros",
|
39 |
"compress-brotli",
|
|
|
41 |
actix-files = { version = "0.6.5", default-features = false }
|
42 |
actix-cors = { version = "0.7.0", default-features = false }
|
43 |
fake-useragent = { version = "0.1.3", default-features = false }
|
44 |
+
env_logger = { version = "0.11.6", default-features = false }
|
45 |
log = { version = "0.4.21", default-features = false }
|
46 |
+
mlua = { version = "0.10.3", features = [
|
47 |
"luajit",
|
48 |
"vendored",
|
49 |
], default-features = false }
|
50 |
+
redis = { version = "0.28.2", features = [
|
51 |
"tokio-comp",
|
52 |
"connection-manager",
|
53 |
+
"tcp_nodelay"
|
54 |
], default-features = false, optional = true }
|
55 |
+
blake3 = { version = "1.5.4", default-features = false }
|
56 |
error-stack = { version = "0.4.0", default-features = false, features = [
|
57 |
"std",
|
58 |
] }
|
59 |
+
async-trait = { version = "0.1.86", default-features = false }
|
60 |
+
regex = { version = "1.11.1", features = ["perf"], default-features = false }
|
61 |
+
futures = { version = "0.3.31", default-features = false, features = ["alloc"] }
|
|
|
|
|
|
|
|
|
62 |
dhat = { version = "0.3.2", optional = true, default-features = false }
|
63 |
+
mimalloc = { version = "0.1.43", default-features = false }
|
64 |
+
async-once-cell = { version = "0.5.4", default-features = false }
|
65 |
+
actix-governor = { version = "0.8.0", default-features = false }
|
66 |
+
moka = { version = "0.12.8", optional = true, default-features = false, features = [
|
67 |
+
"future",
|
68 |
] }
|
69 |
+
async-compression = { version = "0.4.12", default-features = false, features = [
|
70 |
"brotli",
|
71 |
"tokio",
|
72 |
], optional = true }
|
|
|
75 |
"getrandom",
|
76 |
], optional = true }
|
77 |
chacha20 = { version = "0.9.1", default-features = false, optional = true }
|
78 |
+
base64 = { version = "0.22.1", default-features = false, features = [
|
79 |
"std",
|
80 |
], optional = true }
|
81 |
cfg-if = { version = "1.0.0", default-features = false, optional = true }
|
82 |
+
keyword_extraction = { version = "1.5.0", default-features = false, features = [
|
83 |
"tf_idf",
|
84 |
+
"rayon",
|
|
|
85 |
] }
|
|
|
86 |
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
|
87 |
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
|
88 |
"moby",
|
89 |
+
]}
|
90 |
+
|
91 |
+
actix-multipart = { version = "0.7.2", default-features = false, features = [
|
92 |
+
"derive",
|
93 |
+
"tempfile",
|
94 |
+
]}
|
95 |
+
itertools = {version = "0.14.0", default-features = false}
|
96 |
|
97 |
[dev-dependencies]
|
98 |
rusty-hook = { version = "^0.11.2", default-features = false }
|
99 |
criterion = { version = "0.5.1", default-features = false }
|
100 |
+
tempfile = { version = "3.14.0", default-features = false }
|
101 |
|
102 |
[build-dependencies]
|
103 |
+
lightningcss = { version = "1.0.0-alpha.57", default-features = false, features = [
|
104 |
"grid",
|
105 |
] }
|
106 |
+
# Disabled until bug fixing update
|
107 |
+
# minify-js = { version = "0.6.0", default-features = false }
|
108 |
+
# Temporary fork with fix
|
109 |
+
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
|
110 |
|
111 |
[profile.dev]
|
112 |
opt-level = 0
|
|
|
134 |
rpath = false
|
135 |
strip = "symbols"
|
136 |
|
137 |
+
[profile.bsr1]
|
138 |
+
inherits = "release"
|
139 |
+
opt-level = "s"
|
140 |
+
|
141 |
+
[profile.bsr2]
|
142 |
+
inherits = "bsr1"
|
143 |
+
opt-level = "z"
|
144 |
+
|
145 |
+
[profile.lpcb1]
|
146 |
+
inherits = "release"
|
147 |
+
codegen-units = 16
|
148 |
+
|
149 |
+
[profile.lpcb2]
|
150 |
+
inherits = "lpcb1"
|
151 |
+
lto = "off"
|
152 |
+
|
153 |
+
[profile.lpcb3]
|
154 |
+
inherits = "lpcb2"
|
155 |
+
opt-level = 2
|
156 |
+
|
157 |
+
[profile.bsr_and_lpcb1]
|
158 |
+
inherits = "lpcb1"
|
159 |
+
opt-level = "s"
|
160 |
+
|
161 |
+
[profile.bsr_and_lpcb2]
|
162 |
+
inherits = "lpcb2"
|
163 |
+
opt-level = "s"
|
164 |
+
|
165 |
+
[profile.bsr_and_lpcb3]
|
166 |
+
inherits = "lpcb3"
|
167 |
+
opt-level = "s"
|
168 |
+
|
169 |
+
[profile.bsr_and_lpcb4]
|
170 |
+
inherits = "lpcb1"
|
171 |
+
opt-level = "z"
|
172 |
+
|
173 |
+
[profile.bsr_and_lpcb5]
|
174 |
+
inherits = "lpcb1"
|
175 |
+
opt-level = "z"
|
176 |
+
|
177 |
+
[profile.bsr_and_lpcb6]
|
178 |
+
inherits = "lpcb1"
|
179 |
+
opt-level = "z"
|
180 |
+
|
181 |
[features]
|
182 |
use-synonyms-search = ["thesaurus/static"]
|
183 |
default = ["memory-cache"]
|
184 |
dhat-heap = ["dep:dhat"]
|
185 |
+
memory-cache = ["dep:moka"]
|
186 |
redis-cache = ["dep:redis", "dep:base64"]
|
187 |
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
|
188 |
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
|
dev.Dockerfile
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# Create Builder image
|
2 |
-
FROM --platform=$BUILDPLATFORM rust:1.
|
3 |
|
4 |
# Install required dependencies
|
5 |
RUN apk add --no-cache alpine-sdk musl-dev g++ make libcrypto3 libressl-dev perl build-base
|
|
|
1 |
# Create Builder image
|
2 |
+
FROM --platform=$BUILDPLATFORM rust:1.78.0-alpine3.18
|
3 |
|
4 |
# Install required dependencies
|
5 |
RUN apk add --no-cache alpine-sdk musl-dev g++ make libcrypto3 libressl-dev perl build-base
|
flake.lock
CHANGED
@@ -34,11 +34,11 @@
|
|
34 |
},
|
35 |
"nixpkgs_2": {
|
36 |
"locked": {
|
37 |
-
"lastModified":
|
38 |
-
"narHash": "sha256-
|
39 |
"owner": "NixOS",
|
40 |
"repo": "nixpkgs",
|
41 |
-
"rev": "
|
42 |
"type": "github"
|
43 |
},
|
44 |
"original": {
|
|
|
34 |
},
|
35 |
"nixpkgs_2": {
|
36 |
"locked": {
|
37 |
+
"lastModified": 1725194671,
|
38 |
+
"narHash": "sha256-tLGCFEFTB5TaOKkpfw3iYT9dnk4awTP/q4w+ROpMfuw=",
|
39 |
"owner": "NixOS",
|
40 |
"repo": "nixpkgs",
|
41 |
+
"rev": "b833ff01a0d694b910daca6e2ff4a3f26dee478c",
|
42 |
"type": "github"
|
43 |
},
|
44 |
"original": {
|
flake.nix
CHANGED
@@ -36,7 +36,7 @@
|
|
36 |
haskellPackages.hadolint
|
37 |
nodejs
|
38 |
nodePackages_latest.cspell
|
39 |
-
|
40 |
nodePackages_latest.markdownlint-cli2
|
41 |
nodePackages_latest.stylelint
|
42 |
redis
|
|
|
36 |
haskellPackages.hadolint
|
37 |
nodejs
|
38 |
nodePackages_latest.cspell
|
39 |
+
eslint
|
40 |
nodePackages_latest.markdownlint-cli2
|
41 |
nodePackages_latest.stylelint
|
42 |
redis
|
public/images/close.svg
ADDED
|
public/static/index.js
CHANGED
@@ -1,34 +1,6 @@
|
|
1 |
/**
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
/**
|
8 |
-
* Redirects the user to the search results page with the query parameter
|
9 |
-
*/
|
10 |
-
function searchWeb() {
|
11 |
-
const query = searchBox.value.trim()
|
12 |
-
try {
|
13 |
-
let safeSearchLevel = document.querySelector('.search_options select').value
|
14 |
-
if (query) {
|
15 |
-
window.location.href = `search?q=${encodeURIComponent(
|
16 |
-
query,
|
17 |
-
)}&safesearch=${encodeURIComponent(safeSearchLevel)}`
|
18 |
-
}
|
19 |
-
} catch (error) {
|
20 |
-
if (query) {
|
21 |
-
window.location.href = `search?q=${encodeURIComponent(query)}`
|
22 |
-
}
|
23 |
-
}
|
24 |
}
|
25 |
-
|
26 |
-
/**
|
27 |
-
* Listens for the 'Enter' key press event on the search box and calls the searchWeb function
|
28 |
-
* @param {KeyboardEvent} e - The keyboard event object
|
29 |
-
*/
|
30 |
-
searchBox.addEventListener('keyup', (e) => {
|
31 |
-
if (e.key === 'Enter') {
|
32 |
-
searchWeb()
|
33 |
-
}
|
34 |
-
})
|
|
|
1 |
/**
|
2 |
+
* A function that clears the search input text when the clear button is clicked.
|
3 |
+
*/
|
4 |
+
function clearSearchText() {
|
5 |
+
document.querySelector('.search_bar > input').value = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/static/pagination.js
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
/**
|
2 |
-
* Navigates to the next page by incrementing the current page number in the URL query string.
|
3 |
-
* @returns {void}
|
4 |
-
*/
|
5 |
-
function navigate_forward() {
|
6 |
-
let url = new URL(window.location);
|
7 |
-
let searchParams = url.searchParams;
|
8 |
-
|
9 |
-
let q = searchParams.get('q');
|
10 |
-
let page = parseInt(searchParams.get('page'));
|
11 |
-
|
12 |
-
if (isNaN(page)) {
|
13 |
-
page = 1;
|
14 |
-
} else {
|
15 |
-
page++;
|
16 |
-
}
|
17 |
-
|
18 |
-
window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
|
19 |
-
}
|
20 |
-
|
21 |
-
/**
|
22 |
-
* Navigates to the previous page by decrementing the current page number in the URL query string.
|
23 |
-
* @returns {void}
|
24 |
-
*/
|
25 |
-
function navigate_backward() {
|
26 |
-
let url = new URL(window.location);
|
27 |
-
let searchParams = url.searchParams;
|
28 |
-
|
29 |
-
let q = searchParams.get('q');
|
30 |
-
let page = parseInt(searchParams.get('page'));
|
31 |
-
|
32 |
-
if (isNaN(page)) {
|
33 |
-
page = 0;
|
34 |
-
} else if (page > 0) {
|
35 |
-
page--;
|
36 |
-
}
|
37 |
-
|
38 |
-
window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
|
39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/static/search_area_options.js
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
document.addEventListener(
|
2 |
-
'DOMContentLoaded',
|
3 |
-
() => {
|
4 |
-
let url = new URL(window.location)
|
5 |
-
let searchParams = url.searchParams
|
6 |
-
|
7 |
-
let safeSearchLevel = searchParams.get('safesearch')
|
8 |
-
|
9 |
-
if (
|
10 |
-
safeSearchLevel >= 0 &&
|
11 |
-
safeSearchLevel <= 2 &&
|
12 |
-
safeSearchLevel !== null
|
13 |
-
) {
|
14 |
-
document.querySelector('.search_options select').value = safeSearchLevel
|
15 |
-
}
|
16 |
-
},
|
17 |
-
false,
|
18 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/static/themes/simple.css
CHANGED
@@ -5,7 +5,8 @@
|
|
5 |
font-weight: 200 600;
|
6 |
font-stretch: 0% 200%;
|
7 |
font-display: swap;
|
8 |
-
src: url(
|
|
|
9 |
}
|
10 |
|
11 |
* {
|
@@ -73,6 +74,11 @@ button {
|
|
73 |
font-size: 1.6rem;
|
74 |
}
|
75 |
|
|
|
|
|
|
|
|
|
|
|
76 |
.search_bar input:focus {
|
77 |
outline: 2px solid var(--foreground-color);
|
78 |
}
|
@@ -92,7 +98,7 @@ button {
|
|
92 |
outline-offset: 3px;
|
93 |
outline: 2px solid transparent;
|
94 |
border: none;
|
95 |
-
transition: .1s;
|
96 |
gap: 0;
|
97 |
background-color: var(--color-six);
|
98 |
color: var(--background-color);
|
@@ -102,10 +108,10 @@ button {
|
|
102 |
}
|
103 |
|
104 |
.search_bar button img {
|
105 |
-
position:absolute;
|
106 |
-
left:50%;
|
107 |
-
top:50%;
|
108 |
-
transform:translate(-50%, -50%);
|
109 |
}
|
110 |
|
111 |
.search_bar button:active {
|
@@ -248,7 +254,6 @@ button {
|
|
248 |
|
249 |
/* styles for the footer and header */
|
250 |
|
251 |
-
|
252 |
header {
|
253 |
width: 100%;
|
254 |
background: var(--background-color);
|
@@ -336,7 +341,7 @@ footer div {
|
|
336 |
.results_aggregated {
|
337 |
display: flex;
|
338 |
flex-direction: column;
|
339 |
-
justify-content: space-between;
|
340 |
margin: 2rem 0;
|
341 |
content-visibility: auto;
|
342 |
}
|
@@ -443,7 +448,7 @@ footer div {
|
|
443 |
align-items: center;
|
444 |
}
|
445 |
|
446 |
-
.page_navigation
|
447 |
background: var(--background-color);
|
448 |
color: var(--foreground-color);
|
449 |
padding: 1rem;
|
@@ -452,7 +457,7 @@ footer div {
|
|
452 |
border: none;
|
453 |
}
|
454 |
|
455 |
-
.page_navigation
|
456 |
filter: brightness(1.2);
|
457 |
}
|
458 |
|
@@ -709,7 +714,8 @@ footer div {
|
|
709 |
}
|
710 |
|
711 |
.settings_container .user_interface select,
|
712 |
-
.settings_container .general select
|
|
|
713 |
margin: 0.7rem 0;
|
714 |
width: 20rem;
|
715 |
background-color: var(--color-one);
|
@@ -721,6 +727,38 @@ footer div {
|
|
721 |
text-transform: capitalize;
|
722 |
}
|
723 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
724 |
.settings_container .user_interface option:hover,
|
725 |
.settings_container .general option:hover {
|
726 |
background-color: var(--color-one);
|
@@ -793,7 +831,7 @@ footer div {
|
|
793 |
left: 0.4rem;
|
794 |
bottom: 0.4rem;
|
795 |
background-color: var(--background-color);
|
796 |
-
transition: .2s;
|
797 |
}
|
798 |
|
799 |
input:checked + .slider {
|
@@ -817,7 +855,7 @@ input:checked + .slider::before {
|
|
817 |
border-radius: 50%;
|
818 |
}
|
819 |
|
820 |
-
@media screen and (width <=
|
821 |
.hero-text-container {
|
822 |
width: unset;
|
823 |
}
|
@@ -827,7 +865,7 @@ input:checked + .slider::before {
|
|
827 |
}
|
828 |
}
|
829 |
|
830 |
-
@media screen and (width <=
|
831 |
.about-container article .logo-container svg {
|
832 |
width: clamp(200px, 290px, 815px);
|
833 |
}
|
@@ -851,7 +889,7 @@ input:checked + .slider::before {
|
|
851 |
.features {
|
852 |
grid-template-columns: 1fr;
|
853 |
}
|
854 |
-
|
855 |
.feature-list {
|
856 |
padding: 35px 0;
|
857 |
}
|
|
|
5 |
font-weight: 200 600;
|
6 |
font-stretch: 0% 200%;
|
7 |
font-display: swap;
|
8 |
+
src: url('https://fonts.gstatic.com/s/rubik/v28/iJWKBXyIfDnIV7nErXyi0A.woff2')
|
9 |
+
format('woff2');
|
10 |
}
|
11 |
|
12 |
* {
|
|
|
74 |
font-size: 1.6rem;
|
75 |
}
|
76 |
|
77 |
+
.search_bar input::-webkit-search-results-button,
|
78 |
+
.search_bar input::-webkit-search-cancel-button{
|
79 |
+
display: none;
|
80 |
+
}
|
81 |
+
|
82 |
.search_bar input:focus {
|
83 |
outline: 2px solid var(--foreground-color);
|
84 |
}
|
|
|
98 |
outline-offset: 3px;
|
99 |
outline: 2px solid transparent;
|
100 |
border: none;
|
101 |
+
transition: 0.1s;
|
102 |
gap: 0;
|
103 |
background-color: var(--color-six);
|
104 |
color: var(--background-color);
|
|
|
108 |
}
|
109 |
|
110 |
.search_bar button img {
|
111 |
+
position: absolute;
|
112 |
+
left: 50%;
|
113 |
+
top: 50%;
|
114 |
+
transform: translate(-50%, -50%);
|
115 |
}
|
116 |
|
117 |
.search_bar button:active {
|
|
|
254 |
|
255 |
/* styles for the footer and header */
|
256 |
|
|
|
257 |
header {
|
258 |
width: 100%;
|
259 |
background: var(--background-color);
|
|
|
341 |
.results_aggregated {
|
342 |
display: flex;
|
343 |
flex-direction: column;
|
344 |
+
justify-content: space-between;
|
345 |
margin: 2rem 0;
|
346 |
content-visibility: auto;
|
347 |
}
|
|
|
448 |
align-items: center;
|
449 |
}
|
450 |
|
451 |
+
.page_navigation a {
|
452 |
background: var(--background-color);
|
453 |
color: var(--foreground-color);
|
454 |
padding: 1rem;
|
|
|
457 |
border: none;
|
458 |
}
|
459 |
|
460 |
+
.page_navigation a:active {
|
461 |
filter: brightness(1.2);
|
462 |
}
|
463 |
|
|
|
714 |
}
|
715 |
|
716 |
.settings_container .user_interface select,
|
717 |
+
.settings_container .general select,
|
718 |
+
.settings_container .general form input {
|
719 |
margin: 0.7rem 0;
|
720 |
width: 20rem;
|
721 |
background-color: var(--color-one);
|
|
|
727 |
text-transform: capitalize;
|
728 |
}
|
729 |
|
730 |
+
.settings_container .general form input {
|
731 |
+
padding: 0;
|
732 |
+
width: 30rem;
|
733 |
+
text-align: center;
|
734 |
+
text-transform: none;
|
735 |
+
}
|
736 |
+
|
737 |
+
.settings_container .general form input::file-selector-button {
|
738 |
+
content: 'Browse';
|
739 |
+
padding: 1rem 2rem;
|
740 |
+
font-size: 1.5rem;
|
741 |
+
background: var(--color-three);
|
742 |
+
color: var(--background-color);
|
743 |
+
border-radius: 0.5rem;
|
744 |
+
border: 2px solid transparent;
|
745 |
+
font-weight: bold;
|
746 |
+
transition: all 0.1s ease-out;
|
747 |
+
cursor: pointer;
|
748 |
+
box-shadow: 5px 5px;
|
749 |
+
outline: none;
|
750 |
+
translate: -1rem 0;
|
751 |
+
}
|
752 |
+
|
753 |
+
.settings_container .general form input::file-selector-button:active {
|
754 |
+
box-shadow: none;
|
755 |
+
translate: 5px 5px;
|
756 |
+
}
|
757 |
+
|
758 |
+
.settings_container .general .export_btn {
|
759 |
+
margin-bottom: 1rem;
|
760 |
+
}
|
761 |
+
|
762 |
.settings_container .user_interface option:hover,
|
763 |
.settings_container .general option:hover {
|
764 |
background-color: var(--color-one);
|
|
|
831 |
left: 0.4rem;
|
832 |
bottom: 0.4rem;
|
833 |
background-color: var(--background-color);
|
834 |
+
transition: 0.2s;
|
835 |
}
|
836 |
|
837 |
input:checked + .slider {
|
|
|
855 |
border-radius: 50%;
|
856 |
}
|
857 |
|
858 |
+
@media screen and (width <=1136px) {
|
859 |
.hero-text-container {
|
860 |
width: unset;
|
861 |
}
|
|
|
865 |
}
|
866 |
}
|
867 |
|
868 |
+
@media screen and (width <=706px) {
|
869 |
.about-container article .logo-container svg {
|
870 |
width: clamp(200px, 290px, 815px);
|
871 |
}
|
|
|
889 |
.features {
|
890 |
grid-template-columns: 1fr;
|
891 |
}
|
892 |
+
|
893 |
.feature-list {
|
894 |
padding: 35px 0;
|
895 |
}
|
src/cache/cacher.rs
CHANGED
@@ -2,10 +2,9 @@
|
|
2 |
//! from the upstream search engines in a json format.
|
3 |
|
4 |
use error_stack::Report;
|
|
|
5 |
#[cfg(feature = "memory-cache")]
|
6 |
-
use
|
7 |
-
#[cfg(feature = "memory-cache")]
|
8 |
-
use mini_moka::sync::ConcurrentCacheExt;
|
9 |
|
10 |
#[cfg(feature = "memory-cache")]
|
11 |
use std::time::Duration;
|
@@ -214,12 +213,10 @@ pub trait Cacher: Send + Sync {
|
|
214 |
}
|
215 |
|
216 |
/// A helper function that compresses or encrypts search results before they're inserted into a cache store
|
217 |
-
|
218 |
/// # Arguments
|
219 |
///
|
220 |
/// * `search_results` - A reference to the search_Results to process.
|
221 |
///
|
222 |
-
|
223 |
///
|
224 |
/// # Error
|
225 |
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
|
@@ -376,13 +373,13 @@ impl Cacher for RedisCache {
|
|
376 |
}
|
377 |
}
|
378 |
/// TryInto implementation for SearchResults from Vec<u8>
|
379 |
-
use std::convert::TryInto;
|
380 |
|
381 |
impl TryInto<SearchResults> for Vec<u8> {
|
382 |
type Error = CacheError;
|
383 |
|
384 |
fn try_into(self) -> Result<SearchResults, Self::Error> {
|
385 |
-
|
386 |
}
|
387 |
}
|
388 |
|
@@ -390,7 +387,7 @@ impl TryInto<Vec<u8>> for &SearchResults {
|
|
390 |
type Error = CacheError;
|
391 |
|
392 |
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
|
393 |
-
|
394 |
}
|
395 |
}
|
396 |
|
@@ -398,7 +395,16 @@ impl TryInto<Vec<u8>> for &SearchResults {
|
|
398 |
#[cfg(feature = "memory-cache")]
|
399 |
pub struct InMemoryCache {
|
400 |
/// The backend cache which stores data.
|
401 |
-
cache: MokaCache<String, Vec<u8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
}
|
403 |
|
404 |
#[cfg(feature = "memory-cache")]
|
@@ -408,15 +414,17 @@ impl Cacher for InMemoryCache {
|
|
408 |
log::info!("Initialising in-memory cache");
|
409 |
|
410 |
InMemoryCache {
|
411 |
-
cache:
|
412 |
-
|
413 |
-
|
|
|
|
|
414 |
}
|
415 |
}
|
416 |
|
417 |
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
418 |
let hashed_url_string = self.hash_url(url);
|
419 |
-
match self.cache.get(&hashed_url_string) {
|
420 |
Some(res) => self.post_process_search_results(res).await,
|
421 |
None => Err(Report::new(CacheError::MissingValue)),
|
422 |
}
|
@@ -427,13 +435,18 @@ impl Cacher for InMemoryCache {
|
|
427 |
search_results: &[SearchResults],
|
428 |
urls: &[String],
|
429 |
) -> Result<(), Report<CacheError>> {
|
|
|
430 |
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
431 |
let hashed_url_string = self.hash_url(url);
|
432 |
let bytes = self.pre_process_search_results(search_result).await?;
|
433 |
-
self.
|
|
|
|
|
|
|
434 |
}
|
435 |
|
436 |
-
|
|
|
437 |
Ok(())
|
438 |
}
|
439 |
}
|
@@ -531,7 +544,7 @@ impl SharedCache {
|
|
531 |
/// # Arguments
|
532 |
///
|
533 |
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the
|
534 |
-
///
|
535 |
///
|
536 |
/// # Error
|
537 |
///
|
@@ -548,9 +561,9 @@ impl SharedCache {
|
|
548 |
/// # Arguments
|
549 |
///
|
550 |
/// * `search_results` - It takes the `SearchResults` as an argument which are results that
|
551 |
-
///
|
552 |
/// * `url` - It takes the search url as an argument which will be used as the key for storing
|
553 |
-
///
|
554 |
///
|
555 |
/// # Error
|
556 |
///
|
|
|
2 |
//! from the upstream search engines in a json format.
|
3 |
|
4 |
use error_stack::Report;
|
5 |
+
use futures::future::join_all;
|
6 |
#[cfg(feature = "memory-cache")]
|
7 |
+
use moka::future::Cache as MokaCache;
|
|
|
|
|
8 |
|
9 |
#[cfg(feature = "memory-cache")]
|
10 |
use std::time::Duration;
|
|
|
213 |
}
|
214 |
|
215 |
/// A helper function that compresses or encrypts search results before they're inserted into a cache store
|
|
|
216 |
/// # Arguments
|
217 |
///
|
218 |
/// * `search_results` - A reference to the search_Results to process.
|
219 |
///
|
|
|
220 |
///
|
221 |
/// # Error
|
222 |
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
|
|
|
373 |
}
|
374 |
}
|
375 |
/// TryInto implementation for SearchResults from Vec<u8>
|
376 |
+
use std::{convert::TryInto, sync::Arc};
|
377 |
|
378 |
impl TryInto<SearchResults> for Vec<u8> {
|
379 |
type Error = CacheError;
|
380 |
|
381 |
fn try_into(self) -> Result<SearchResults, Self::Error> {
|
382 |
+
bincode::deserialize_from(self.as_slice()).map_err(|_| CacheError::SerializationError)
|
383 |
}
|
384 |
}
|
385 |
|
|
|
387 |
type Error = CacheError;
|
388 |
|
389 |
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
|
390 |
+
bincode::serialize(self).map_err(|_| CacheError::SerializationError)
|
391 |
}
|
392 |
}
|
393 |
|
|
|
395 |
#[cfg(feature = "memory-cache")]
|
396 |
pub struct InMemoryCache {
|
397 |
/// The backend cache which stores data.
|
398 |
+
cache: Arc<MokaCache<String, Vec<u8>>>,
|
399 |
+
}
|
400 |
+
|
401 |
+
#[cfg(feature = "memory-cache")]
|
402 |
+
impl Clone for InMemoryCache {
|
403 |
+
fn clone(&self) -> Self {
|
404 |
+
Self {
|
405 |
+
cache: self.cache.clone(),
|
406 |
+
}
|
407 |
+
}
|
408 |
}
|
409 |
|
410 |
#[cfg(feature = "memory-cache")]
|
|
|
414 |
log::info!("Initialising in-memory cache");
|
415 |
|
416 |
InMemoryCache {
|
417 |
+
cache: Arc::new(
|
418 |
+
MokaCache::builder()
|
419 |
+
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
|
420 |
+
.build(),
|
421 |
+
),
|
422 |
}
|
423 |
}
|
424 |
|
425 |
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
426 |
let hashed_url_string = self.hash_url(url);
|
427 |
+
match self.cache.get(&hashed_url_string).await {
|
428 |
Some(res) => self.post_process_search_results(res).await,
|
429 |
None => Err(Report::new(CacheError::MissingValue)),
|
430 |
}
|
|
|
435 |
search_results: &[SearchResults],
|
436 |
urls: &[String],
|
437 |
) -> Result<(), Report<CacheError>> {
|
438 |
+
let mut tasks: Vec<_> = Vec::with_capacity(urls.len());
|
439 |
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
440 |
let hashed_url_string = self.hash_url(url);
|
441 |
let bytes = self.pre_process_search_results(search_result).await?;
|
442 |
+
let new_self = self.clone();
|
443 |
+
tasks.push(tokio::spawn(async move {
|
444 |
+
new_self.cache.insert(hashed_url_string, bytes).await
|
445 |
+
}));
|
446 |
}
|
447 |
|
448 |
+
join_all(tasks).await;
|
449 |
+
|
450 |
Ok(())
|
451 |
}
|
452 |
}
|
|
|
544 |
/// # Arguments
|
545 |
///
|
546 |
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the
|
547 |
+
/// cached results from the cache.
|
548 |
///
|
549 |
/// # Error
|
550 |
///
|
|
|
561 |
/// # Arguments
|
562 |
///
|
563 |
/// * `search_results` - It takes the `SearchResults` as an argument which are results that
|
564 |
+
/// needs to be cached.
|
565 |
/// * `url` - It takes the search url as an argument which will be used as the key for storing
|
566 |
+
/// results in the cache.
|
567 |
///
|
568 |
/// # Error
|
569 |
///
|
src/cache/redis_cacher.rs
CHANGED
@@ -4,7 +4,10 @@
|
|
4 |
use super::error::CacheError;
|
5 |
use error_stack::Report;
|
6 |
use futures::stream::FuturesUnordered;
|
7 |
-
use redis::{
|
|
|
|
|
|
|
8 |
|
9 |
/// A constant holding the redis pipeline size.
|
10 |
const REDIS_PIPELINE_SIZE: usize = 3;
|
@@ -13,7 +16,7 @@ const REDIS_PIPELINE_SIZE: usize = 3;
|
|
13 |
/// connect to.
|
14 |
pub struct RedisCache {
|
15 |
/// It stores a pool of connections ready to be used.
|
16 |
-
connection_pool:
|
17 |
/// It stores the size of the connection pool (in other words the number of
|
18 |
/// connections that should be stored in the pool).
|
19 |
pool_size: u8,
|
@@ -55,13 +58,13 @@ impl RedisCache {
|
|
55 |
}));
|
56 |
}
|
57 |
|
58 |
-
let mut outputs = Vec::
|
59 |
for task in tasks {
|
60 |
outputs.push(task.await??);
|
61 |
}
|
62 |
|
63 |
let redis_cache = RedisCache {
|
64 |
-
connection_pool: outputs,
|
65 |
pool_size,
|
66 |
current_connection: Default::default(),
|
67 |
cache_ttl,
|
@@ -139,8 +142,14 @@ impl RedisCache {
|
|
139 |
self.current_connection = Default::default();
|
140 |
|
141 |
for (key, json_result) in keys.zip(json_results) {
|
142 |
-
self.pipeline
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
}
|
145 |
|
146 |
let mut result: Result<(), RedisError> = self
|
|
|
4 |
use super::error::CacheError;
|
5 |
use error_stack::Report;
|
6 |
use futures::stream::FuturesUnordered;
|
7 |
+
use redis::{
|
8 |
+
aio::ConnectionManager, AsyncCommands, Client, ExistenceCheck, RedisError, SetExpiry,
|
9 |
+
SetOptions,
|
10 |
+
};
|
11 |
|
12 |
/// A constant holding the redis pipeline size.
|
13 |
const REDIS_PIPELINE_SIZE: usize = 3;
|
|
|
16 |
/// connect to.
|
17 |
pub struct RedisCache {
|
18 |
/// It stores a pool of connections ready to be used.
|
19 |
+
connection_pool: Box<[ConnectionManager]>,
|
20 |
/// It stores the size of the connection pool (in other words the number of
|
21 |
/// connections that should be stored in the pool).
|
22 |
pool_size: u8,
|
|
|
58 |
}));
|
59 |
}
|
60 |
|
61 |
+
let mut outputs = Vec::with_capacity(tasks.len());
|
62 |
for task in tasks {
|
63 |
outputs.push(task.await??);
|
64 |
}
|
65 |
|
66 |
let redis_cache = RedisCache {
|
67 |
+
connection_pool: outputs.into_boxed_slice(),
|
68 |
pool_size,
|
69 |
current_connection: Default::default(),
|
70 |
cache_ttl,
|
|
|
142 |
self.current_connection = Default::default();
|
143 |
|
144 |
for (key, json_result) in keys.zip(json_results) {
|
145 |
+
self.pipeline.set_options(
|
146 |
+
key,
|
147 |
+
json_result,
|
148 |
+
SetOptions::default()
|
149 |
+
.conditional_set(ExistenceCheck::NX)
|
150 |
+
.get(true)
|
151 |
+
.with_expiration(SetExpiry::EX(self.cache_ttl.into())),
|
152 |
+
);
|
153 |
}
|
154 |
|
155 |
let mut result: Result<(), RedisError> = self
|
src/config/parser.rs
CHANGED
@@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType};
|
|
6 |
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
use mlua::Lua;
|
|
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
@@ -37,15 +38,23 @@ pub struct Config {
|
|
37 |
pub request_timeout: u8,
|
38 |
/// It stores the number of threads which controls the app will use to run.
|
39 |
pub threads: u8,
|
|
|
|
|
40 |
/// It stores configuration options for the ratelimiting middleware.
|
41 |
pub rate_limiter: RateLimiter,
|
42 |
/// It stores the level of safe search to be used for restricting content in the
|
43 |
/// search results.
|
44 |
pub safe_search: u8,
|
45 |
/// It stores the TCP connection keepalive duration in seconds.
|
46 |
-
pub
|
47 |
/// It stores the pool idle connection timeout in seconds.
|
48 |
pub pool_idle_connection_timeout: u8,
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
}
|
50 |
|
51 |
impl Config {
|
@@ -55,7 +64,7 @@ impl Config {
|
|
55 |
/// # Arguments
|
56 |
///
|
57 |
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
|
58 |
-
///
|
59 |
///
|
60 |
/// # Error
|
61 |
///
|
@@ -69,11 +78,11 @@ impl Config {
|
|
69 |
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
70 |
.exec()?;
|
71 |
|
72 |
-
let parsed_threads: u8 = globals.get
|
73 |
|
74 |
-
let debug: bool = globals.get
|
75 |
-
let logging: bool = globals.get
|
76 |
-
let adaptive_window: bool = globals.get
|
77 |
|
78 |
if !logging_initialized {
|
79 |
set_logging_level(debug, logging);
|
@@ -90,9 +99,9 @@ impl Config {
|
|
90 |
parsed_threads
|
91 |
};
|
92 |
|
93 |
-
let rate_limiter
|
94 |
|
95 |
-
let parsed_safe_search: u8 = globals.get::<_
|
96 |
let safe_search: u8 = match parsed_safe_search {
|
97 |
0..=4 => parsed_safe_search,
|
98 |
_ => {
|
@@ -103,7 +112,7 @@ impl Config {
|
|
103 |
};
|
104 |
|
105 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
106 |
-
let parsed_cet = globals.get::<_
|
107 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
108 |
let cache_expiry_time = match parsed_cet {
|
109 |
0..=59 => {
|
@@ -116,28 +125,39 @@ impl Config {
|
|
116 |
_ => parsed_cet,
|
117 |
};
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
Ok(Config {
|
120 |
-
|
121 |
-
|
|
|
|
|
122 |
style: Style::new(
|
123 |
-
globals.get::<_
|
124 |
-
globals.get::<_
|
125 |
-
globals.get::<_
|
126 |
),
|
127 |
#[cfg(feature = "redis-cache")]
|
128 |
-
redis_url: globals.get::<_
|
129 |
aggregator: AggregatorConfig {
|
130 |
-
random_delay: globals.get::<_
|
131 |
},
|
132 |
logging,
|
133 |
debug,
|
134 |
adaptive_window,
|
135 |
-
upstream_search_engines: globals
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
threads,
|
|
|
141 |
rate_limiter: RateLimiter {
|
142 |
number_of_requests: rate_limiter["number_of_requests"],
|
143 |
time_limit: rate_limiter["time_limit"],
|
@@ -145,6 +165,7 @@ impl Config {
|
|
145 |
safe_search,
|
146 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
147 |
cache_expiry_time,
|
|
|
148 |
})
|
149 |
}
|
150 |
}
|
|
|
6 |
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
use mlua::Lua;
|
9 |
+
use reqwest::Proxy;
|
10 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
11 |
|
12 |
/// A named struct which stores the parsed config file options.
|
|
|
38 |
pub request_timeout: u8,
|
39 |
/// It stores the number of threads which controls the app will use to run.
|
40 |
pub threads: u8,
|
41 |
+
/// Set the keep-alive time for client connections to the HTTP server
|
42 |
+
pub client_connection_keep_alive: u8,
|
43 |
/// It stores configuration options for the ratelimiting middleware.
|
44 |
pub rate_limiter: RateLimiter,
|
45 |
/// It stores the level of safe search to be used for restricting content in the
|
46 |
/// search results.
|
47 |
pub safe_search: u8,
|
48 |
/// It stores the TCP connection keepalive duration in seconds.
|
49 |
+
pub tcp_connection_keep_alive: u8,
|
50 |
/// It stores the pool idle connection timeout in seconds.
|
51 |
pub pool_idle_connection_timeout: u8,
|
52 |
+
/// Url of the proxy to use for outgoing requests.
|
53 |
+
pub proxy: Option<Proxy>,
|
54 |
+
/// It stores the number of https connections to keep in the pool.
|
55 |
+
pub number_of_https_connections: u8,
|
56 |
+
/// It stores the operating system's TLS certificates for https requests.
|
57 |
+
pub operating_system_tls_certificates: bool,
|
58 |
}
|
59 |
|
60 |
impl Config {
|
|
|
64 |
/// # Arguments
|
65 |
///
|
66 |
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
|
67 |
+
/// initialized twice. Pass false if the logger has not yet been initialized.
|
68 |
///
|
69 |
/// # Error
|
70 |
///
|
|
|
78 |
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
79 |
.exec()?;
|
80 |
|
81 |
+
let parsed_threads: u8 = globals.get("threads")?;
|
82 |
|
83 |
+
let debug: bool = globals.get("debug")?;
|
84 |
+
let logging: bool = globals.get("logging")?;
|
85 |
+
let adaptive_window: bool = globals.get("adaptive_window")?;
|
86 |
|
87 |
if !logging_initialized {
|
88 |
set_logging_level(debug, logging);
|
|
|
99 |
parsed_threads
|
100 |
};
|
101 |
|
102 |
+
let rate_limiter: HashMap<String, u8> = globals.get("rate_limiter")?;
|
103 |
|
104 |
+
let parsed_safe_search: u8 = globals.get::<_>("safe_search")?;
|
105 |
let safe_search: u8 = match parsed_safe_search {
|
106 |
0..=4 => parsed_safe_search,
|
107 |
_ => {
|
|
|
112 |
};
|
113 |
|
114 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
115 |
+
let parsed_cet = globals.get::<_>("cache_expiry_time")?;
|
116 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
117 |
let cache_expiry_time = match parsed_cet {
|
118 |
0..=59 => {
|
|
|
125 |
_ => parsed_cet,
|
126 |
};
|
127 |
|
128 |
+
let proxy_opt: Option<String> = globals.get::<_>("proxy")?;
|
129 |
+
let proxy = proxy_opt.and_then(|proxy_str| {
|
130 |
+
Proxy::all(proxy_str).ok().and_then(|_| {
|
131 |
+
log::error!("Invalid proxy url, defaulting to no proxy.");
|
132 |
+
None
|
133 |
+
})
|
134 |
+
});
|
135 |
+
|
136 |
Ok(Config {
|
137 |
+
operating_system_tls_certificates: globals
|
138 |
+
.get::<_>("operating_system_tls_certificates")?,
|
139 |
+
port: globals.get::<_>("port")?,
|
140 |
+
binding_ip: globals.get::<_>("binding_ip")?,
|
141 |
style: Style::new(
|
142 |
+
globals.get::<_>("theme")?,
|
143 |
+
globals.get::<_>("colorscheme")?,
|
144 |
+
globals.get::<_>("animation")?,
|
145 |
),
|
146 |
#[cfg(feature = "redis-cache")]
|
147 |
+
redis_url: globals.get::<_>("redis_url")?,
|
148 |
aggregator: AggregatorConfig {
|
149 |
+
random_delay: globals.get::<_>("production_use")?,
|
150 |
},
|
151 |
logging,
|
152 |
debug,
|
153 |
adaptive_window,
|
154 |
+
upstream_search_engines: globals.get::<_>("upstream_search_engines")?,
|
155 |
+
request_timeout: globals.get::<_>("request_timeout")?,
|
156 |
+
tcp_connection_keep_alive: globals.get::<_>("tcp_connection_keep_alive")?,
|
157 |
+
pool_idle_connection_timeout: globals.get::<_>("pool_idle_connection_timeout")?,
|
158 |
+
number_of_https_connections: globals.get::<_>("number_of_https_connections")?,
|
159 |
threads,
|
160 |
+
client_connection_keep_alive: globals.get::<_>("client_connection_keep_alive")?,
|
161 |
rate_limiter: RateLimiter {
|
162 |
number_of_requests: rate_limiter["number_of_requests"],
|
163 |
time_limit: rate_limiter["time_limit"],
|
|
|
165 |
safe_search,
|
166 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
167 |
cache_expiry_time,
|
168 |
+
proxy,
|
169 |
})
|
170 |
}
|
171 |
}
|
src/engines/bing.rs
CHANGED
@@ -15,6 +15,7 @@ use crate::models::engine_models::{EngineError, SearchEngine};
|
|
15 |
|
16 |
use error_stack::{Report, Result, ResultExt};
|
17 |
|
|
|
18 |
use super::search_result_parser::SearchResultParser;
|
19 |
|
20 |
/// A new Bing engine type defined in-order to implement the `SearchEngine` trait which allows to
|
@@ -73,19 +74,16 @@ impl SearchEngine for Bing {
|
|
73 |
("_UR=QS=0&TQS", "0"),
|
74 |
];
|
75 |
|
76 |
-
let
|
77 |
-
for (k, v) in &query_params {
|
78 |
-
cookie_string.push_str(&format!("{k}={v}; "));
|
79 |
-
}
|
80 |
|
81 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
82 |
-
("
|
83 |
-
("
|
84 |
(
|
85 |
-
"
|
86 |
"application/x-www-form-urlencoded".to_string(),
|
87 |
),
|
88 |
-
("
|
89 |
]))
|
90 |
.change_context(EngineError::UnexpectedError)?;
|
91 |
|
|
|
15 |
|
16 |
use error_stack::{Report, Result, ResultExt};
|
17 |
|
18 |
+
use super::common::build_cookie;
|
19 |
use super::search_result_parser::SearchResultParser;
|
20 |
|
21 |
/// A new Bing engine type defined in-order to implement the `SearchEngine` trait which allows to
|
|
|
74 |
("_UR=QS=0&TQS", "0"),
|
75 |
];
|
76 |
|
77 |
+
let cookie_string = build_cookie(&query_params);
|
|
|
|
|
|
|
78 |
|
79 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
80 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
81 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
82 |
(
|
83 |
+
"Content-Type".to_string(),
|
84 |
"application/x-www-form-urlencoded".to_string(),
|
85 |
),
|
86 |
+
("Cookie".to_string(), cookie_string),
|
87 |
]))
|
88 |
.change_context(EngineError::UnexpectedError)?;
|
89 |
|
src/engines/brave.rs
CHANGED
@@ -54,14 +54,14 @@ impl SearchEngine for Brave {
|
|
54 |
};
|
55 |
|
56 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
57 |
-
("
|
58 |
(
|
59 |
-
"
|
60 |
"application/x-www-form-urlencoded".to_string(),
|
61 |
),
|
62 |
-
("
|
63 |
(
|
64 |
-
"
|
65 |
format!("safe_search={safe_search_level}"),
|
66 |
),
|
67 |
]))
|
|
|
54 |
};
|
55 |
|
56 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
57 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
58 |
(
|
59 |
+
"Content-Type".to_string(),
|
60 |
"application/x-www-form-urlencoded".to_string(),
|
61 |
),
|
62 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
63 |
(
|
64 |
+
"Cookie".to_string(),
|
65 |
format!("safe_search={safe_search_level}"),
|
66 |
),
|
67 |
]))
|
src/engines/common.rs
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides common functionalities for engines
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Build a query from a list of key value pairs.
|
5 |
+
*/
|
6 |
+
pub fn build_query(query_params: &[(&str, &str)]) -> String {
|
7 |
+
let mut query_params_string = String::new();
|
8 |
+
for (k, v) in query_params {
|
9 |
+
query_params_string.push_str(&format!("&{k}={v}"));
|
10 |
+
}
|
11 |
+
query_params_string
|
12 |
+
}
|
13 |
+
|
14 |
+
/**
|
15 |
+
* Build a cookie from a list of key value pairs.
|
16 |
+
*/
|
17 |
+
pub fn build_cookie(cookie_params: &[(&str, &str)]) -> String {
|
18 |
+
let mut cookie_string = String::new();
|
19 |
+
for (k, v) in cookie_params {
|
20 |
+
cookie_string.push_str(&format!("{k}={v}; "));
|
21 |
+
}
|
22 |
+
cookie_string
|
23 |
+
}
|
src/engines/duckduckgo.rs
CHANGED
@@ -65,13 +65,13 @@ impl SearchEngine for DuckDuckGo {
|
|
65 |
|
66 |
// initializing HeaderMap and adding appropriate headers.
|
67 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
68 |
-
("
|
69 |
-
("
|
70 |
(
|
71 |
-
"
|
72 |
"application/x-www-form-urlencoded".to_string(),
|
73 |
),
|
74 |
-
("
|
75 |
]))
|
76 |
.change_context(EngineError::UnexpectedError)?;
|
77 |
|
|
|
65 |
|
66 |
// initializing HeaderMap and adding appropriate headers.
|
67 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
68 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
69 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
70 |
(
|
71 |
+
"Content-Type".to_string(),
|
72 |
"application/x-www-form-urlencoded".to_string(),
|
73 |
),
|
74 |
+
("Cookie".to_string(), "kl=wt-wt".to_string()),
|
75 |
]))
|
76 |
.change_context(EngineError::UnexpectedError)?;
|
77 |
|
src/engines/librex.rs
CHANGED
@@ -30,7 +30,7 @@ impl LibreX {
|
|
30 |
Ok(Self {
|
31 |
parser: SearchResultParser::new(
|
32 |
".text-result-container>p",
|
33 |
-
".text-result-
|
34 |
".text-result-wrapper>a>h2",
|
35 |
".text-result-wrapper>a",
|
36 |
".text-result-wrapper>span",
|
@@ -72,11 +72,11 @@ impl SearchEngine for LibreX {
|
|
72 |
|
73 |
// initializing HeaderMap and adding appropriate headers.
|
74 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
75 |
-
("
|
76 |
-
("
|
77 |
-
("
|
78 |
(
|
79 |
-
"
|
80 |
"theme=amoled; disable_special=on; disable_frontends=on; language=en; number_of_results=10; safe_search=on; save=1".to_string(),
|
81 |
),
|
82 |
]))
|
|
|
30 |
Ok(Self {
|
31 |
parser: SearchResultParser::new(
|
32 |
".text-result-container>p",
|
33 |
+
".text-result-wrapper",
|
34 |
".text-result-wrapper>a>h2",
|
35 |
".text-result-wrapper>a",
|
36 |
".text-result-wrapper>span",
|
|
|
72 |
|
73 |
// initializing HeaderMap and adding appropriate headers.
|
74 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
75 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
76 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
77 |
+
("Content-Type".to_string(), "application/x-www-form-urlencoded".to_string()),
|
78 |
(
|
79 |
+
"Cookie".to_string(),
|
80 |
"theme=amoled; disable_special=on; disable_frontends=on; language=en; number_of_results=10; safe_search=on; save=1".to_string(),
|
81 |
),
|
82 |
]))
|
src/engines/mod.rs
CHANGED
@@ -5,9 +5,11 @@
|
|
5 |
|
6 |
pub mod bing;
|
7 |
pub mod brave;
|
|
|
8 |
pub mod duckduckgo;
|
9 |
pub mod librex;
|
10 |
pub mod mojeek;
|
11 |
pub mod search_result_parser;
|
12 |
pub mod searx;
|
13 |
pub mod startpage;
|
|
|
|
5 |
|
6 |
pub mod bing;
|
7 |
pub mod brave;
|
8 |
+
pub mod common;
|
9 |
pub mod duckduckgo;
|
10 |
pub mod librex;
|
11 |
pub mod mojeek;
|
12 |
pub mod search_result_parser;
|
13 |
pub mod searx;
|
14 |
pub mod startpage;
|
15 |
+
pub mod wikipedia;
|
src/engines/mojeek.rs
CHANGED
@@ -14,6 +14,7 @@ use crate::models::engine_models::{EngineError, SearchEngine};
|
|
14 |
|
15 |
use error_stack::{Report, Result, ResultExt};
|
16 |
|
|
|
17 |
use super::search_result_parser::SearchResultParser;
|
18 |
|
19 |
/// A new Mojeek engine type defined in-order to implement the `SearchEngine` trait which allows to
|
@@ -30,8 +31,8 @@ impl Mojeek {
|
|
30 |
parser: SearchResultParser::new(
|
31 |
".result-col",
|
32 |
".results-standard li",
|
33 |
-
"a
|
34 |
-
"
|
35 |
"p.s",
|
36 |
)?,
|
37 |
})
|
@@ -107,10 +108,7 @@ impl SearchEngine for Mojeek {
|
|
107 |
("safe", &safe),
|
108 |
];
|
109 |
|
110 |
-
let
|
111 |
-
for (k, v) in &query_params {
|
112 |
-
query_params_string.push_str(&format!("&{k}={v}"));
|
113 |
-
}
|
114 |
|
115 |
let url: String = match page {
|
116 |
0 => {
|
@@ -123,19 +121,16 @@ impl SearchEngine for Mojeek {
|
|
123 |
}
|
124 |
};
|
125 |
|
126 |
-
let
|
127 |
-
for (k, v) in &query_params {
|
128 |
-
cookie_string.push_str(&format!("{k}={v}; "));
|
129 |
-
}
|
130 |
|
131 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
132 |
-
("
|
133 |
-
("
|
134 |
(
|
135 |
-
"
|
136 |
"application/x-www-form-urlencoded".to_string(),
|
137 |
),
|
138 |
-
("
|
139 |
]))
|
140 |
.change_context(EngineError::UnexpectedError)?;
|
141 |
|
@@ -157,7 +152,7 @@ impl SearchEngine for Mojeek {
|
|
157 |
.parse_for_results(&document, |title, url, desc| {
|
158 |
Some(SearchResult::new(
|
159 |
title.inner_html().trim(),
|
160 |
-
url.
|
161 |
desc.inner_html().trim(),
|
162 |
&["mojeek"],
|
163 |
))
|
|
|
14 |
|
15 |
use error_stack::{Report, Result, ResultExt};
|
16 |
|
17 |
+
use super::common::{build_cookie, build_query};
|
18 |
use super::search_result_parser::SearchResultParser;
|
19 |
|
20 |
/// A new Mojeek engine type defined in-order to implement the `SearchEngine` trait which allows to
|
|
|
31 |
parser: SearchResultParser::new(
|
32 |
".result-col",
|
33 |
".results-standard li",
|
34 |
+
"h2 > a.title",
|
35 |
+
"a.ob",
|
36 |
"p.s",
|
37 |
)?,
|
38 |
})
|
|
|
108 |
("safe", &safe),
|
109 |
];
|
110 |
|
111 |
+
let query_params_string = build_query(&query_params);
|
|
|
|
|
|
|
112 |
|
113 |
let url: String = match page {
|
114 |
0 => {
|
|
|
121 |
}
|
122 |
};
|
123 |
|
124 |
+
let cookie_string = build_cookie(&query_params);
|
|
|
|
|
|
|
125 |
|
126 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
127 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
128 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
129 |
(
|
130 |
+
"Content-Type".to_string(),
|
131 |
"application/x-www-form-urlencoded".to_string(),
|
132 |
),
|
133 |
+
("Cookie".to_string(), cookie_string),
|
134 |
]))
|
135 |
.change_context(EngineError::UnexpectedError)?;
|
136 |
|
|
|
152 |
.parse_for_results(&document, |title, url, desc| {
|
153 |
Some(SearchResult::new(
|
154 |
title.inner_html().trim(),
|
155 |
+
url.attr("href")?.trim(),
|
156 |
desc.inner_html().trim(),
|
157 |
&["mojeek"],
|
158 |
))
|
src/engines/searx.rs
CHANGED
@@ -66,10 +66,10 @@ impl SearchEngine for Searx {
|
|
66 |
|
67 |
// initializing headers and adding appropriate headers.
|
68 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
69 |
-
("
|
70 |
-
("
|
71 |
-
("
|
72 |
-
("
|
73 |
]))
|
74 |
.change_context(EngineError::UnexpectedError)?;
|
75 |
|
|
|
66 |
|
67 |
// initializing headers and adding appropriate headers.
|
68 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
69 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
70 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
71 |
+
("Content-Type".to_string(), "application/x-www-form-urlencoded".to_string()),
|
72 |
+
("Cookie".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
|
73 |
]))
|
74 |
.change_context(EngineError::UnexpectedError)?;
|
75 |
|
src/engines/startpage.rs
CHANGED
@@ -57,13 +57,13 @@ impl SearchEngine for Startpage {
|
|
57 |
|
58 |
// initializing HeaderMap and adding appropriate headers.
|
59 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
60 |
-
("
|
61 |
-
("
|
62 |
(
|
63 |
-
"
|
64 |
"application/x-www-form-urlencoded".to_string(),
|
65 |
),
|
66 |
-
("
|
67 |
]))
|
68 |
.change_context(EngineError::UnexpectedError)?;
|
69 |
|
|
|
57 |
|
58 |
// initializing HeaderMap and adding appropriate headers.
|
59 |
let header_map = HeaderMap::try_from(&HashMap::from([
|
60 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
61 |
+
("Referer".to_string(), "https://google.com/".to_string()),
|
62 |
(
|
63 |
+
"Content-Type".to_string(),
|
64 |
"application/x-www-form-urlencoded".to_string(),
|
65 |
),
|
66 |
+
("Cookie".to_string(), "preferences=connect_to_serverEEE0N1Ndate_timeEEEworldN1Ndisable_family_filterEEE0N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE1N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fnight%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE10N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius".to_string()),
|
67 |
]))
|
68 |
.change_context(EngineError::UnexpectedError)?;
|
69 |
|
src/engines/wikipedia.rs
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! The `wikipedia` module handles the scraping of results from wikipedia
|
2 |
+
//! with user provided query and with a page number if provided.
|
3 |
+
|
4 |
+
use std::collections::HashMap;
|
5 |
+
|
6 |
+
use reqwest::header::HeaderMap;
|
7 |
+
use reqwest::Client;
|
8 |
+
use scraper::Html;
|
9 |
+
|
10 |
+
use crate::models::aggregation_models::SearchResult;
|
11 |
+
|
12 |
+
use crate::models::engine_models::{EngineError, SearchEngine};
|
13 |
+
|
14 |
+
use error_stack::{Report, Result, ResultExt};
|
15 |
+
|
16 |
+
use super::common::build_query;
|
17 |
+
use super::search_result_parser::SearchResultParser;
|
18 |
+
|
19 |
+
/// A new Wikipedia engine type defined in-order to implement the `SearchEngine` trait which allows to
|
20 |
+
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
21 |
+
pub struct Wikipedia {
|
22 |
+
/// The parser, used to interpret the search result.
|
23 |
+
parser: SearchResultParser,
|
24 |
+
/// The id of the engine, equals to 'wikipedia-' + language
|
25 |
+
id: String,
|
26 |
+
/// The host where wikipedia can be accessed.
|
27 |
+
host: String,
|
28 |
+
}
|
29 |
+
|
30 |
+
impl Wikipedia {
|
31 |
+
/// Creates the Wikipedia parser.
|
32 |
+
pub fn new(language: &str) -> Result<Self, EngineError> {
|
33 |
+
let host = format!("https://{}.wikipedia.org", &language);
|
34 |
+
let id = format!("wikipedia-{}", &language);
|
35 |
+
Ok(Self {
|
36 |
+
parser: SearchResultParser::new(
|
37 |
+
"p.mw-search-nonefound",
|
38 |
+
".mw-search-results li.mw-search-result",
|
39 |
+
".mw-search-result-heading a",
|
40 |
+
".mw-search-result-heading a",
|
41 |
+
".searchresult",
|
42 |
+
)?,
|
43 |
+
id,
|
44 |
+
host,
|
45 |
+
})
|
46 |
+
}
|
47 |
+
}
|
48 |
+
|
49 |
+
#[async_trait::async_trait]
|
50 |
+
impl SearchEngine for Wikipedia {
|
51 |
+
async fn results(
|
52 |
+
&self,
|
53 |
+
query: &str,
|
54 |
+
page: u32,
|
55 |
+
user_agent: &str,
|
56 |
+
client: &Client,
|
57 |
+
_safe_search: u8,
|
58 |
+
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
59 |
+
let header_map = HeaderMap::try_from(&HashMap::from([
|
60 |
+
("User-Agent".to_string(), user_agent.to_string()),
|
61 |
+
("Referer".to_string(), self.host.to_string()),
|
62 |
+
]))
|
63 |
+
.change_context(EngineError::UnexpectedError)?;
|
64 |
+
|
65 |
+
let offset = (page * 20).to_string();
|
66 |
+
let query_params: Vec<(&str, &str)> = vec![
|
67 |
+
("limit", "20"),
|
68 |
+
("offset", &offset),
|
69 |
+
("profile", "default"),
|
70 |
+
("search", query),
|
71 |
+
("title", "Special:Search"),
|
72 |
+
("ns0", "1"),
|
73 |
+
];
|
74 |
+
|
75 |
+
let query_params_string = build_query(&query_params);
|
76 |
+
|
77 |
+
let url: String = format!("{}/w/index.php?{}", self.host, query_params_string);
|
78 |
+
|
79 |
+
let document: Html = Html::parse_document(
|
80 |
+
&Wikipedia::fetch_html_from_upstream(self, &url, header_map, client).await?,
|
81 |
+
);
|
82 |
+
|
83 |
+
if self.parser.parse_for_no_results(&document).next().is_some() {
|
84 |
+
return Err(Report::new(EngineError::EmptyResultSet));
|
85 |
+
}
|
86 |
+
|
87 |
+
// scrape all the results from the html
|
88 |
+
self.parser
|
89 |
+
.parse_for_results(&document, |title, url, desc| {
|
90 |
+
let found_url = url.attr("href");
|
91 |
+
found_url.map(|relative_url| {
|
92 |
+
SearchResult::new(
|
93 |
+
title.inner_html().trim(),
|
94 |
+
&format!("{}{relative_url}", self.host),
|
95 |
+
desc.inner_html().trim(),
|
96 |
+
&[&self.id],
|
97 |
+
)
|
98 |
+
})
|
99 |
+
})
|
100 |
+
}
|
101 |
+
}
|
src/lib.rs
CHANGED
@@ -14,7 +14,7 @@ pub mod results;
|
|
14 |
pub mod server;
|
15 |
pub mod templates;
|
16 |
|
17 |
-
use std::{net::TcpListener, sync::OnceLock};
|
18 |
|
19 |
use crate::server::router;
|
20 |
|
@@ -91,7 +91,7 @@ pub fn run(
|
|
91 |
.wrap(cors)
|
92 |
.wrap(Governor::new(
|
93 |
&GovernorConfigBuilder::default()
|
94 |
-
.
|
95 |
.burst_size(config.rate_limiter.number_of_requests as u32)
|
96 |
.finish()
|
97 |
.unwrap(),
|
@@ -110,9 +110,14 @@ pub fn run(
|
|
110 |
.service(server::routes::search::search) // search page
|
111 |
.service(router::about) // about page
|
112 |
.service(router::settings) // settings page
|
|
|
113 |
.default_service(web::route().to(router::not_found)) // error page
|
114 |
})
|
115 |
.workers(config.threads as usize)
|
|
|
|
|
|
|
|
|
116 |
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
117 |
.listen(listener)?
|
118 |
.run();
|
|
|
14 |
pub mod server;
|
15 |
pub mod templates;
|
16 |
|
17 |
+
use std::{net::TcpListener, sync::OnceLock, time::Duration};
|
18 |
|
19 |
use crate::server::router;
|
20 |
|
|
|
91 |
.wrap(cors)
|
92 |
.wrap(Governor::new(
|
93 |
&GovernorConfigBuilder::default()
|
94 |
+
.seconds_per_request(config.rate_limiter.time_limit as u64)
|
95 |
.burst_size(config.rate_limiter.number_of_requests as u32)
|
96 |
.finish()
|
97 |
.unwrap(),
|
|
|
110 |
.service(server::routes::search::search) // search page
|
111 |
.service(router::about) // about page
|
112 |
.service(router::settings) // settings page
|
113 |
+
.service(server::routes::export_import::download) // download page
|
114 |
.default_service(web::route().to(router::not_found)) // error page
|
115 |
})
|
116 |
.workers(config.threads as usize)
|
117 |
+
// Set the keep-alive timer for client connections
|
118 |
+
.keep_alive(Duration::from_secs(
|
119 |
+
config.client_connection_keep_alive as u64,
|
120 |
+
))
|
121 |
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
122 |
.listen(listener)?
|
123 |
.run();
|
src/models/aggregation_models.rs
CHANGED
@@ -3,7 +3,6 @@
|
|
3 |
|
4 |
use super::engine_models::EngineError;
|
5 |
use serde::{Deserialize, Serialize};
|
6 |
-
use smallvec::SmallVec;
|
7 |
#[cfg(any(
|
8 |
feature = "use-synonyms-search",
|
9 |
feature = "use-non-static-synonyms-search"
|
@@ -12,7 +11,9 @@ use thesaurus::synonyms;
|
|
12 |
/// A named struct to store the raw scraped search results scraped search results from the
|
13 |
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
14 |
/// to write idiomatic rust using `Iterators`.
|
15 |
-
///
|
|
|
|
|
16 |
#[derive(Clone, Serialize, Deserialize)]
|
17 |
#[serde(rename_all = "camelCase")]
|
18 |
pub struct SearchResult {
|
@@ -23,7 +24,7 @@ pub struct SearchResult {
|
|
23 |
/// The description of the search result.
|
24 |
pub description: String,
|
25 |
/// The names of the upstream engines from which this results were provided.
|
26 |
-
pub engine:
|
27 |
/// The td-tdf score of the result in regards to the title, url and description and the user's query
|
28 |
pub relevance_score: f32,
|
29 |
}
|
@@ -35,7 +36,7 @@ impl SearchResult {
|
|
35 |
///
|
36 |
/// * `title` - The title of the search result.
|
37 |
/// * `url` - The url which is accessed when clicked on it
|
38 |
-
///
|
39 |
/// * `description` - The description of the search result.
|
40 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
41 |
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
@@ -125,7 +126,7 @@ impl EngineErrorInfo {
|
|
125 |
/// # Arguments
|
126 |
///
|
127 |
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
128 |
-
///
|
129 |
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
130 |
pub fn new(error: &EngineError, engine: &str) -> Self {
|
131 |
Self {
|
@@ -153,10 +154,10 @@ impl EngineErrorInfo {
|
|
153 |
#[serde(rename_all = "camelCase")]
|
154 |
pub struct SearchResults {
|
155 |
/// Stores the individual serializable `SearchResult` struct into a vector of
|
156 |
-
pub results:
|
157 |
/// Stores the information on which engines failed with their engine name
|
158 |
/// and the type of error that caused it.
|
159 |
-
pub engine_errors_info:
|
160 |
/// Stores the flag option which holds the check value that the following
|
161 |
/// search query was disallowed when the safe search level set to 4 and it
|
162 |
/// was present in the `Blocklist` file.
|
@@ -178,15 +179,15 @@ impl SearchResults {
|
|
178 |
/// # Arguments
|
179 |
///
|
180 |
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
181 |
-
///
|
182 |
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
183 |
-
///
|
184 |
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
185 |
-
///
|
186 |
-
pub fn new(results:
|
187 |
Self {
|
188 |
results,
|
189 |
-
engine_errors_info
|
190 |
disallowed: Default::default(),
|
191 |
filtered: Default::default(),
|
192 |
safe_search_level: Default::default(),
|
@@ -205,11 +206,11 @@ impl SearchResults {
|
|
205 |
}
|
206 |
|
207 |
/// A getter function that gets the value of `engine_errors_info`.
|
208 |
-
pub fn engine_errors_info(&mut self) ->
|
209 |
std::mem::take(&mut self.engine_errors_info)
|
210 |
}
|
211 |
/// A getter function that gets the value of `results`.
|
212 |
-
pub fn results(&mut self) ->
|
213 |
self.results.clone()
|
214 |
}
|
215 |
|
@@ -254,27 +255,50 @@ fn calculate_tf_idf(
|
|
254 |
let tf_idf = TfIdf::new(params);
|
255 |
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
|
256 |
let query_tokens = tokener.split_into_words();
|
257 |
-
let mut search_tokens = vec![];
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
{
|
265 |
-
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
|
266 |
-
let synonyms = synonyms(&token);
|
267 |
-
search_tokens.extend(synonyms)
|
268 |
-
}
|
269 |
-
search_tokens.push(token);
|
270 |
-
}
|
271 |
|
272 |
-
let
|
273 |
-
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
-
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
f32::from(!result.is_nan()) * result
|
280 |
}
|
|
|
3 |
|
4 |
use super::engine_models::EngineError;
|
5 |
use serde::{Deserialize, Serialize};
|
|
|
6 |
#[cfg(any(
|
7 |
feature = "use-synonyms-search",
|
8 |
feature = "use-non-static-synonyms-search"
|
|
|
11 |
/// A named struct to store the raw scraped search results scraped search results from the
|
12 |
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
13 |
/// to write idiomatic rust using `Iterators`.
|
14 |
+
///
|
15 |
+
/// (href url in html in simple words).
|
16 |
+
///
|
17 |
#[derive(Clone, Serialize, Deserialize)]
|
18 |
#[serde(rename_all = "camelCase")]
|
19 |
pub struct SearchResult {
|
|
|
24 |
/// The description of the search result.
|
25 |
pub description: String,
|
26 |
/// The names of the upstream engines from which this results were provided.
|
27 |
+
pub engine: Vec<String>,
|
28 |
/// The td-tdf score of the result in regards to the title, url and description and the user's query
|
29 |
pub relevance_score: f32,
|
30 |
}
|
|
|
36 |
///
|
37 |
/// * `title` - The title of the search result.
|
38 |
/// * `url` - The url which is accessed when clicked on it
|
39 |
+
/// (href url in html in simple words).
|
40 |
/// * `description` - The description of the search result.
|
41 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
42 |
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
|
|
126 |
/// # Arguments
|
127 |
///
|
128 |
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
129 |
+
/// search engine.
|
130 |
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
131 |
pub fn new(error: &EngineError, engine: &str) -> Self {
|
132 |
Self {
|
|
|
154 |
#[serde(rename_all = "camelCase")]
|
155 |
pub struct SearchResults {
|
156 |
/// Stores the individual serializable `SearchResult` struct into a vector of
|
157 |
+
pub results: Box<[SearchResult]>,
|
158 |
/// Stores the information on which engines failed with their engine name
|
159 |
/// and the type of error that caused it.
|
160 |
+
pub engine_errors_info: Box<[EngineErrorInfo]>,
|
161 |
/// Stores the flag option which holds the check value that the following
|
162 |
/// search query was disallowed when the safe search level set to 4 and it
|
163 |
/// was present in the `Blocklist` file.
|
|
|
179 |
/// # Arguments
|
180 |
///
|
181 |
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
182 |
+
/// and stores it into a vector of `SearchResult` structs.
|
183 |
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
184 |
+
/// the search url.
|
185 |
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
186 |
+
/// which engines failed with their names, reason and their severity color name.
|
187 |
+
pub fn new(results: Box<[SearchResult]>, engine_errors_info: Box<[EngineErrorInfo]>) -> Self {
|
188 |
Self {
|
189 |
results,
|
190 |
+
engine_errors_info,
|
191 |
disallowed: Default::default(),
|
192 |
filtered: Default::default(),
|
193 |
safe_search_level: Default::default(),
|
|
|
206 |
}
|
207 |
|
208 |
/// A getter function that gets the value of `engine_errors_info`.
|
209 |
+
pub fn engine_errors_info(&mut self) -> Box<[EngineErrorInfo]> {
|
210 |
std::mem::take(&mut self.engine_errors_info)
|
211 |
}
|
212 |
/// A getter function that gets the value of `results`.
|
213 |
+
pub fn results(&mut self) -> Box<[SearchResult]> {
|
214 |
self.results.clone()
|
215 |
}
|
216 |
|
|
|
255 |
let tf_idf = TfIdf::new(params);
|
256 |
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
|
257 |
let query_tokens = tokener.split_into_words();
|
|
|
258 |
|
259 |
+
#[cfg(any(
|
260 |
+
feature = "use-synonyms-search",
|
261 |
+
feature = "use-non-static-synonyms-search"
|
262 |
+
))]
|
263 |
+
let mut extra_tokens = vec![];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
+
let total_score: f32 = query_tokens
|
266 |
+
.iter()
|
267 |
+
.map(|token| {
|
268 |
+
#[cfg(any(
|
269 |
+
feature = "use-synonyms-search",
|
270 |
+
feature = "use-non-static-synonyms-search"
|
271 |
+
))]
|
272 |
+
{
|
273 |
+
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
|
274 |
+
extra_tokens.extend(synonyms(token))
|
275 |
+
}
|
276 |
+
|
277 |
+
tf_idf.get_score(token)
|
278 |
+
})
|
279 |
+
.sum();
|
280 |
+
|
281 |
+
#[cfg(not(any(
|
282 |
+
feature = "use-synonyms-search",
|
283 |
+
feature = "use-non-static-synonyms-search"
|
284 |
+
)))]
|
285 |
+
let result = total_score / (query_tokens.len() as f32);
|
286 |
+
|
287 |
+
#[cfg(any(
|
288 |
+
feature = "use-synonyms-search",
|
289 |
+
feature = "use-non-static-synonyms-search"
|
290 |
+
))]
|
291 |
+
let extra_total_score: f32 = extra_tokens
|
292 |
+
.iter()
|
293 |
+
.map(|token| tf_idf.get_score(token))
|
294 |
+
.sum();
|
295 |
|
296 |
+
#[cfg(any(
|
297 |
+
feature = "use-synonyms-search",
|
298 |
+
feature = "use-non-static-synonyms-search"
|
299 |
+
))]
|
300 |
+
let result =
|
301 |
+
(extra_total_score + total_score) / ((query_tokens.len() + extra_tokens.len()) as f32);
|
302 |
|
303 |
f32::from(!result.is_nan()) * result
|
304 |
}
|
src/models/engine_models.rs
CHANGED
@@ -206,6 +206,10 @@ impl EngineHandler {
|
|
206 |
let engine = crate::engines::bing::Bing::new()?;
|
207 |
("bing", Box::new(engine))
|
208 |
}
|
|
|
|
|
|
|
|
|
209 |
_ => {
|
210 |
return Err(Report::from(EngineError::NoSuchEngineFound(
|
211 |
engine_name.to_string(),
|
|
|
206 |
let engine = crate::engines::bing::Bing::new()?;
|
207 |
("bing", Box::new(engine))
|
208 |
}
|
209 |
+
"wikipedia" => {
|
210 |
+
let engine = crate::engines::wikipedia::Wikipedia::new("en")?;
|
211 |
+
("wikipedia", Box::new(engine))
|
212 |
+
}
|
213 |
_ => {
|
214 |
return Err(Report::from(EngineError::NoSuchEngineFound(
|
215 |
engine_name.to_string(),
|
src/models/parser_models.rs
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
/// order to allow the deserializing the json back to struct in aggregate function in
|
11 |
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
12 |
/// it to the template files.
|
13 |
-
#[derive(Default)]
|
14 |
pub struct Style {
|
15 |
/// It stores the parsed theme option used to set a theme for the website.
|
16 |
pub theme: String,
|
@@ -29,7 +29,7 @@ impl Style {
|
|
29 |
///
|
30 |
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
|
31 |
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
|
32 |
-
///
|
33 |
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
|
34 |
Style {
|
35 |
theme,
|
|
|
10 |
/// order to allow the deserializing the json back to struct in aggregate function in
|
11 |
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
12 |
/// it to the template files.
|
13 |
+
#[derive(Default, Clone)]
|
14 |
pub struct Style {
|
15 |
/// It stores the parsed theme option used to set a theme for the website.
|
16 |
pub theme: String,
|
|
|
29 |
///
|
30 |
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
|
31 |
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
|
32 |
+
/// for the theme being used.
|
33 |
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
|
34 |
Style {
|
35 |
theme,
|
src/models/server_models.rs
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
//! engine website.
|
3 |
use std::borrow::Cow;
|
4 |
|
5 |
-
use serde::Deserialize;
|
6 |
|
7 |
use super::parser_models::Style;
|
8 |
|
@@ -11,7 +11,7 @@ use super::parser_models::Style;
|
|
11 |
pub struct SearchParams {
|
12 |
/// It stores the search parameter option `q` (or query in simple words)
|
13 |
/// of the search url.
|
14 |
-
pub q: Option<
|
15 |
/// It stores the search parameter `page` (or pageno in simple words)
|
16 |
/// of the search url.
|
17 |
pub page: Option<u32>,
|
@@ -22,16 +22,22 @@ pub struct SearchParams {
|
|
22 |
|
23 |
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
24 |
#[allow(dead_code)]
|
25 |
-
#[derive(Deserialize)]
|
26 |
pub struct Cookie<'a> {
|
|
|
27 |
/// It stores the theme name used in the website.
|
28 |
pub theme: Cow<'a, str>,
|
|
|
29 |
/// It stores the colorscheme name used for the website theme.
|
30 |
pub colorscheme: Cow<'a, str>,
|
|
|
31 |
/// It stores the user selected upstream search engines selected from the UI.
|
32 |
-
pub engines: Cow<'a,
|
33 |
/// It stores the user selected safe search level from the UI.
|
34 |
pub safe_search_level: u8,
|
|
|
|
|
|
|
35 |
}
|
36 |
|
37 |
impl<'a> Cookie<'a> {
|
@@ -43,6 +49,10 @@ impl<'a> Cookie<'a> {
|
|
43 |
colorscheme: Cow::Borrowed(&style.colorscheme),
|
44 |
engines: Cow::Owned(engines),
|
45 |
safe_search_level,
|
|
|
|
|
|
|
|
|
46 |
}
|
47 |
}
|
48 |
}
|
|
|
2 |
//! engine website.
|
3 |
use std::borrow::Cow;
|
4 |
|
5 |
+
use serde::{Deserialize, Serialize};
|
6 |
|
7 |
use super::parser_models::Style;
|
8 |
|
|
|
11 |
pub struct SearchParams {
|
12 |
/// It stores the search parameter option `q` (or query in simple words)
|
13 |
/// of the search url.
|
14 |
+
pub q: Option<Cow<'static, str>>,
|
15 |
/// It stores the search parameter `page` (or pageno in simple words)
|
16 |
/// of the search url.
|
17 |
pub page: Option<u32>,
|
|
|
22 |
|
23 |
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
24 |
#[allow(dead_code)]
|
25 |
+
#[derive(Deserialize, Serialize)]
|
26 |
pub struct Cookie<'a> {
|
27 |
+
#[serde(borrow)]
|
28 |
/// It stores the theme name used in the website.
|
29 |
pub theme: Cow<'a, str>,
|
30 |
+
#[serde(borrow)]
|
31 |
/// It stores the colorscheme name used for the website theme.
|
32 |
pub colorscheme: Cow<'a, str>,
|
33 |
+
#[serde(borrow)]
|
34 |
/// It stores the user selected upstream search engines selected from the UI.
|
35 |
+
pub engines: Cow<'a, [Cow<'a, str>]>,
|
36 |
/// It stores the user selected safe search level from the UI.
|
37 |
pub safe_search_level: u8,
|
38 |
+
#[serde(borrow)]
|
39 |
+
/// It stores the animation name used for the website theme.
|
40 |
+
pub animation: Option<Cow<'a, str>>,
|
41 |
}
|
42 |
|
43 |
impl<'a> Cookie<'a> {
|
|
|
49 |
colorscheme: Cow::Borrowed(&style.colorscheme),
|
50 |
engines: Cow::Owned(engines),
|
51 |
safe_search_level,
|
52 |
+
animation: style
|
53 |
+
.animation
|
54 |
+
.as_ref()
|
55 |
+
.map(|str| Cow::Borrowed(str.as_str())),
|
56 |
}
|
57 |
}
|
58 |
}
|
src/results/aggregator.rs
CHANGED
@@ -14,7 +14,6 @@ use futures::stream::FuturesUnordered;
|
|
14 |
use regex::Regex;
|
15 |
use reqwest::{Client, ClientBuilder};
|
16 |
use std::sync::Arc;
|
17 |
-
use std::time::{SystemTime, UNIX_EPOCH};
|
18 |
use tokio::{
|
19 |
fs::File,
|
20 |
io::{AsyncBufReadExt, BufReader},
|
@@ -61,7 +60,7 @@ type FutureVec =
|
|
61 |
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
|
62 |
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
|
63 |
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
|
64 |
-
///
|
65 |
///
|
66 |
/// # Error
|
67 |
///
|
@@ -76,30 +75,30 @@ pub async fn aggregate(
|
|
76 |
safe_search: u8,
|
77 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
78 |
let client = CLIENT.get_or_init(|| {
|
79 |
-
ClientBuilder::new()
|
80 |
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
81 |
.pool_idle_timeout(Duration::from_secs(
|
82 |
config.pool_idle_connection_timeout as u64,
|
83 |
))
|
84 |
-
.tcp_keepalive(Duration::from_secs(config.
|
|
|
85 |
.connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
|
|
|
|
86 |
.https_only(true)
|
87 |
.gzip(true)
|
88 |
.brotli(true)
|
89 |
-
.http2_adaptive_window(config.adaptive_window)
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
92 |
});
|
93 |
|
94 |
let user_agent: &str = random_user_agent();
|
95 |
|
96 |
-
// Add a random delay before making the request.
|
97 |
-
if config.aggregator.random_delay || !config.debug {
|
98 |
-
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
99 |
-
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
100 |
-
tokio::time::sleep(Duration::from_secs(delay)).await;
|
101 |
-
}
|
102 |
-
|
103 |
let mut names: Vec<&str> = Vec::with_capacity(0);
|
104 |
|
105 |
// create tasks for upstream result fetching
|
@@ -188,19 +187,21 @@ pub async fn aggregate(
|
|
188 |
drop(blacklist_map);
|
189 |
}
|
190 |
|
191 |
-
let mut results:
|
192 |
-
.
|
193 |
-
.map(|(_, value)| {
|
194 |
-
|
195 |
-
|
196 |
-
copy.calculate_relevance(query.as_str())
|
197 |
}
|
198 |
-
|
199 |
})
|
200 |
.collect();
|
201 |
sort_search_results(&mut results);
|
202 |
|
203 |
-
Ok(SearchResults::new(
|
|
|
|
|
|
|
204 |
}
|
205 |
|
206 |
/// Filters a map of search results using a list of regex patterns.
|
@@ -247,6 +248,7 @@ pub async fn filter_with_lists(
|
|
247 |
|
248 |
Ok(())
|
249 |
}
|
|
|
250 |
/// Sorts SearchResults by relevance score.
|
251 |
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
|
252 |
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
|
@@ -262,10 +264,10 @@ fn sort_search_results(results: &mut [SearchResult]) {
|
|
262 |
.unwrap_or(Ordering::Less)
|
263 |
})
|
264 |
}
|
|
|
265 |
#[cfg(test)]
|
266 |
mod tests {
|
267 |
use super::*;
|
268 |
-
use smallvec::smallvec;
|
269 |
use std::io::Write;
|
270 |
use tempfile::NamedTempFile;
|
271 |
|
@@ -281,7 +283,7 @@ mod tests {
|
|
281 |
description: "This domain is for use in illustrative examples in documents."
|
282 |
.to_owned(),
|
283 |
relevance_score: 0.0,
|
284 |
-
engine:
|
285 |
},
|
286 |
));
|
287 |
map_to_be_filtered.push((
|
@@ -290,7 +292,7 @@ mod tests {
|
|
290 |
title: "Rust Programming Language".to_owned(),
|
291 |
url: "https://www.rust-lang.org/".to_owned(),
|
292 |
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
293 |
-
engine:
|
294 |
relevance_score:0.0
|
295 |
},)
|
296 |
);
|
@@ -331,7 +333,7 @@ mod tests {
|
|
331 |
url: "https://www.example.com".to_owned(),
|
332 |
description: "This domain is for use in illustrative examples in documents."
|
333 |
.to_owned(),
|
334 |
-
engine:
|
335 |
relevance_score: 0.0,
|
336 |
},
|
337 |
));
|
@@ -341,7 +343,7 @@ mod tests {
|
|
341 |
title: "Rust Programming Language".to_owned(),
|
342 |
url: "https://www.rust-lang.org/".to_owned(),
|
343 |
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
344 |
-
engine:
|
345 |
relevance_score:0.0
|
346 |
},
|
347 |
));
|
@@ -398,7 +400,7 @@ mod tests {
|
|
398 |
url: "https://www.example.com".to_owned(),
|
399 |
description: "This domain is for use in illustrative examples in documents."
|
400 |
.to_owned(),
|
401 |
-
engine:
|
402 |
relevance_score: 0.0,
|
403 |
},
|
404 |
));
|
|
|
14 |
use regex::Regex;
|
15 |
use reqwest::{Client, ClientBuilder};
|
16 |
use std::sync::Arc;
|
|
|
17 |
use tokio::{
|
18 |
fs::File,
|
19 |
io::{AsyncBufReadExt, BufReader},
|
|
|
60 |
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
|
61 |
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
|
62 |
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
|
63 |
+
/// user through the UI or the config file.
|
64 |
///
|
65 |
/// # Error
|
66 |
///
|
|
|
75 |
safe_search: u8,
|
76 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
77 |
let client = CLIENT.get_or_init(|| {
|
78 |
+
let mut cb = ClientBuilder::new()
|
79 |
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
80 |
.pool_idle_timeout(Duration::from_secs(
|
81 |
config.pool_idle_connection_timeout as u64,
|
82 |
))
|
83 |
+
.tcp_keepalive(Duration::from_secs(config.tcp_connection_keep_alive as u64))
|
84 |
+
.pool_max_idle_per_host(config.number_of_https_connections as usize)
|
85 |
.connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
86 |
+
.use_rustls_tls()
|
87 |
+
.tls_built_in_root_certs(config.operating_system_tls_certificates)
|
88 |
.https_only(true)
|
89 |
.gzip(true)
|
90 |
.brotli(true)
|
91 |
+
.http2_adaptive_window(config.adaptive_window);
|
92 |
+
|
93 |
+
if config.proxy.is_some() {
|
94 |
+
cb = cb.proxy(config.proxy.clone().unwrap());
|
95 |
+
}
|
96 |
+
|
97 |
+
cb.build().unwrap()
|
98 |
});
|
99 |
|
100 |
let user_agent: &str = random_user_agent();
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
let mut names: Vec<&str> = Vec::with_capacity(0);
|
103 |
|
104 |
// create tasks for upstream result fetching
|
|
|
187 |
drop(blacklist_map);
|
188 |
}
|
189 |
|
190 |
+
let mut results: Box<[SearchResult]> = result_map
|
191 |
+
.into_iter()
|
192 |
+
.map(|(_, mut value)| {
|
193 |
+
if !value.url.contains("temu.com") {
|
194 |
+
value.calculate_relevance(query.as_str())
|
|
|
195 |
}
|
196 |
+
value
|
197 |
})
|
198 |
.collect();
|
199 |
sort_search_results(&mut results);
|
200 |
|
201 |
+
Ok(SearchResults::new(
|
202 |
+
results,
|
203 |
+
engine_errors_info.into_boxed_slice(),
|
204 |
+
))
|
205 |
}
|
206 |
|
207 |
/// Filters a map of search results using a list of regex patterns.
|
|
|
248 |
|
249 |
Ok(())
|
250 |
}
|
251 |
+
|
252 |
/// Sorts SearchResults by relevance score.
|
253 |
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
|
254 |
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
|
|
|
264 |
.unwrap_or(Ordering::Less)
|
265 |
})
|
266 |
}
|
267 |
+
|
268 |
#[cfg(test)]
|
269 |
mod tests {
|
270 |
use super::*;
|
|
|
271 |
use std::io::Write;
|
272 |
use tempfile::NamedTempFile;
|
273 |
|
|
|
283 |
description: "This domain is for use in illustrative examples in documents."
|
284 |
.to_owned(),
|
285 |
relevance_score: 0.0,
|
286 |
+
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
287 |
},
|
288 |
));
|
289 |
map_to_be_filtered.push((
|
|
|
292 |
title: "Rust Programming Language".to_owned(),
|
293 |
url: "https://www.rust-lang.org/".to_owned(),
|
294 |
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
295 |
+
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
296 |
relevance_score:0.0
|
297 |
},)
|
298 |
);
|
|
|
333 |
url: "https://www.example.com".to_owned(),
|
334 |
description: "This domain is for use in illustrative examples in documents."
|
335 |
.to_owned(),
|
336 |
+
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
337 |
relevance_score: 0.0,
|
338 |
},
|
339 |
));
|
|
|
343 |
title: "Rust Programming Language".to_owned(),
|
344 |
url: "https://www.rust-lang.org/".to_owned(),
|
345 |
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
346 |
+
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
347 |
relevance_score:0.0
|
348 |
},
|
349 |
));
|
|
|
400 |
url: "https://www.example.com".to_owned(),
|
401 |
description: "This domain is for use in illustrative examples in documents."
|
402 |
.to_owned(),
|
403 |
+
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
404 |
relevance_score: 0.0,
|
405 |
},
|
406 |
));
|
src/server/routes/export_import.rs
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module handles the settings and download route of the search engine website.
|
2 |
+
|
3 |
+
use crate::{
|
4 |
+
handler::{file_path, FileType},
|
5 |
+
models::{self, server_models},
|
6 |
+
Config,
|
7 |
+
};
|
8 |
+
use actix_multipart::form::{tempfile::TempFile, MultipartForm};
|
9 |
+
use actix_web::{
|
10 |
+
cookie::{
|
11 |
+
time::{Duration, OffsetDateTime},
|
12 |
+
Cookie,
|
13 |
+
},
|
14 |
+
get, post, web, HttpRequest, HttpResponse,
|
15 |
+
};
|
16 |
+
use std::borrow::Cow;
|
17 |
+
use std::io::Read;
|
18 |
+
|
19 |
+
use tokio::fs::read_dir;
|
20 |
+
|
21 |
+
/// A helper function that helps in building the list of all available colorscheme/theme/animation
|
22 |
+
/// names present in the colorschemes, animations and themes folder respectively by excluding the
|
23 |
+
/// ones that have already been selected via the config file.
|
24 |
+
///
|
25 |
+
/// # Arguments
|
26 |
+
///
|
27 |
+
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
|
28 |
+
/// argument.
|
29 |
+
///
|
30 |
+
/// # Error
|
31 |
+
///
|
32 |
+
/// Returns a list of colorscheme/theme names as a vector of tuple strings on success otherwise
|
33 |
+
/// returns a standard error message.
|
34 |
+
async fn style_option_list<'a>(
|
35 |
+
style_type: &'a str,
|
36 |
+
) -> Result<Box<[Cow<'a, str>]>, Box<dyn std::error::Error>> {
|
37 |
+
let mut style_options = Vec::new();
|
38 |
+
let mut dir = read_dir(format!(
|
39 |
+
"{}static/{}/",
|
40 |
+
file_path(FileType::Theme)?,
|
41 |
+
style_type,
|
42 |
+
))
|
43 |
+
.await?;
|
44 |
+
while let Some(file) = dir.next_entry().await? {
|
45 |
+
let style_name = file.file_name().to_str().unwrap().replace(".css", "");
|
46 |
+
style_options.push(Cow::Owned(style_name));
|
47 |
+
}
|
48 |
+
|
49 |
+
if style_type == "animations" {
|
50 |
+
style_options.push(Cow::default())
|
51 |
+
}
|
52 |
+
|
53 |
+
Ok(style_options.into_boxed_slice())
|
54 |
+
}
|
55 |
+
|
56 |
+
/// A helper function which santizes user provided json data from the input file.
|
57 |
+
///
|
58 |
+
/// # Arguments
|
59 |
+
///
|
60 |
+
/// * `config` - It takes the config struct as an argument.
|
61 |
+
/// * `setting_value` - It takes the cookie struct as an argument.
|
62 |
+
///
|
63 |
+
/// # Error
|
64 |
+
///
|
65 |
+
/// returns a standard error message on failure otherwise it returns the unit type.
|
66 |
+
async fn sanitize(
|
67 |
+
config: web::Data<&'static Config>,
|
68 |
+
setting_value: &mut models::server_models::Cookie<'_>,
|
69 |
+
) -> Result<(), Box<dyn std::error::Error>> {
|
70 |
+
// Check whether the theme, colorscheme and animation option is valid by matching it against
|
71 |
+
// the available option list. If the option provided by the user via the JSON file is invalid
|
72 |
+
// then replace the user provided by the default one used by the server via the config file.
|
73 |
+
|
74 |
+
if !style_option_list("themes")
|
75 |
+
.await?
|
76 |
+
.contains(&setting_value.theme)
|
77 |
+
{
|
78 |
+
setting_value.theme = Cow::Borrowed(&config.style.theme)
|
79 |
+
} else if !style_option_list("colorschemes")
|
80 |
+
.await?
|
81 |
+
.contains(&setting_value.colorscheme)
|
82 |
+
{
|
83 |
+
setting_value.colorscheme = Cow::Borrowed(&config.style.colorscheme)
|
84 |
+
} else if !style_option_list("animations")
|
85 |
+
.await?
|
86 |
+
.contains(setting_value.animation.as_ref().unwrap())
|
87 |
+
{
|
88 |
+
setting_value.animation = config
|
89 |
+
.style
|
90 |
+
.animation
|
91 |
+
.as_ref()
|
92 |
+
.map(|str| Cow::Borrowed(str.as_str()));
|
93 |
+
}
|
94 |
+
|
95 |
+
// Filters out any engines in the list that are invalid by matching each engine against the
|
96 |
+
// available engine list.
|
97 |
+
let engines: Vec<_> = setting_value
|
98 |
+
.engines
|
99 |
+
.iter()
|
100 |
+
.cloned()
|
101 |
+
.filter_map(|engine| {
|
102 |
+
config
|
103 |
+
.upstream_search_engines
|
104 |
+
.keys()
|
105 |
+
.cloned()
|
106 |
+
.any(|other_engine| *engine == other_engine)
|
107 |
+
.then_some(engine.clone())
|
108 |
+
})
|
109 |
+
.collect();
|
110 |
+
setting_value.engines = Cow::Owned(engines);
|
111 |
+
|
112 |
+
setting_value.safe_search_level = match setting_value.safe_search_level {
|
113 |
+
0..2 => setting_value.safe_search_level,
|
114 |
+
_ => u8::default(),
|
115 |
+
};
|
116 |
+
|
117 |
+
Ok(())
|
118 |
+
}
|
119 |
+
|
120 |
+
/// A multipart struct which stores user provided input file data in memory.
|
121 |
+
#[derive(MultipartForm)]
|
122 |
+
struct File {
|
123 |
+
/// It stores the input file data in memory.
|
124 |
+
file: TempFile,
|
125 |
+
}
|
126 |
+
|
127 |
+
/// Handles the route of the post settings page.
|
128 |
+
#[post("/settings")]
|
129 |
+
pub async fn set_settings(
|
130 |
+
config: web::Data<&'static Config>,
|
131 |
+
MultipartForm(mut form): MultipartForm<File>,
|
132 |
+
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
133 |
+
if let Some(file_name) = form.file.file_name {
|
134 |
+
let file_name_parts = file_name.split(".");
|
135 |
+
if let 2 = file_name_parts.clone().count() {
|
136 |
+
if let Some("json") = file_name_parts.last() {
|
137 |
+
if let 0 = form.file.size {
|
138 |
+
return Ok(HttpResponse::BadRequest().finish());
|
139 |
+
} else {
|
140 |
+
let mut data = String::new();
|
141 |
+
form.file.file.read_to_string(&mut data).unwrap();
|
142 |
+
|
143 |
+
let mut unsanitized_json_data: models::server_models::Cookie<'_> =
|
144 |
+
serde_json::from_str(&data)?;
|
145 |
+
|
146 |
+
sanitize(config, &mut unsanitized_json_data).await?;
|
147 |
+
|
148 |
+
let sanitized_json_data: String =
|
149 |
+
serde_json::json!(unsanitized_json_data).to_string();
|
150 |
+
|
151 |
+
return Ok(HttpResponse::Ok()
|
152 |
+
.cookie(
|
153 |
+
Cookie::build("appCookie", sanitized_json_data)
|
154 |
+
.expires(
|
155 |
+
OffsetDateTime::now_utc().saturating_add(Duration::weeks(52)),
|
156 |
+
)
|
157 |
+
.finish(),
|
158 |
+
)
|
159 |
+
.finish());
|
160 |
+
}
|
161 |
+
}
|
162 |
+
}
|
163 |
+
}
|
164 |
+
Ok(HttpResponse::Ok().finish())
|
165 |
+
}
|
166 |
+
|
167 |
+
/// Handles the route of the download page.
|
168 |
+
#[get("/download")]
|
169 |
+
pub async fn download(
|
170 |
+
config: web::Data<&'static Config>,
|
171 |
+
req: HttpRequest,
|
172 |
+
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
173 |
+
let cookie = req.cookie("appCookie");
|
174 |
+
|
175 |
+
// Get search settings using the user's cookie or from the server's config
|
176 |
+
let preferences: server_models::Cookie<'_> = cookie
|
177 |
+
.as_ref()
|
178 |
+
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
|
179 |
+
.unwrap_or_else(|| {
|
180 |
+
server_models::Cookie::build(
|
181 |
+
&config.style,
|
182 |
+
config
|
183 |
+
.upstream_search_engines
|
184 |
+
.iter()
|
185 |
+
.filter_map(|(engine, enabled)| {
|
186 |
+
enabled.then_some(Cow::Borrowed(engine.as_str()))
|
187 |
+
})
|
188 |
+
.collect(),
|
189 |
+
u8::default(),
|
190 |
+
)
|
191 |
+
});
|
192 |
+
|
193 |
+
Ok(HttpResponse::Ok().json(preferences))
|
194 |
+
}
|
src/server/routes/mod.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
//! This module provides modules to handle various routes in the search engine website.
|
2 |
|
|
|
3 |
pub mod search;
|
|
|
1 |
//! This module provides modules to handle various routes in the search engine website.
|
2 |
|
3 |
+
pub mod export_import;
|
4 |
pub mod search;
|
src/server/routes/search.rs
CHANGED
@@ -12,8 +12,10 @@ use crate::{
|
|
12 |
results::aggregator::aggregate,
|
13 |
};
|
14 |
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
|
|
15 |
use regex::Regex;
|
16 |
-
use std::
|
|
|
17 |
use tokio::{
|
18 |
fs::File,
|
19 |
io::{AsyncBufReadExt, BufReader},
|
@@ -40,7 +42,6 @@ pub async fn search(
|
|
40 |
config: web::Data<&'static Config>,
|
41 |
cache: web::Data<&'static SharedCache>,
|
42 |
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
43 |
-
use std::sync::Arc;
|
44 |
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
45 |
match ¶ms.q {
|
46 |
Some(query) => {
|
@@ -54,6 +55,7 @@ pub async fn search(
|
|
54 |
|
55 |
// Get search settings using the user's cookie or from the server's config
|
56 |
let mut search_settings: server_models::Cookie<'_> = cookie
|
|
|
57 |
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
|
58 |
.unwrap_or_else(|| {
|
59 |
server_models::Cookie::build(
|
@@ -83,44 +85,41 @@ pub async fn search(
|
|
83 |
let previous_page = page.saturating_sub(1);
|
84 |
let next_page = page + 1;
|
85 |
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
if page != previous_page {
|
88 |
let (previous_results, current_results, next_results) = join!(
|
89 |
get_results(previous_page),
|
90 |
get_results(page),
|
91 |
get_results(next_page)
|
92 |
);
|
93 |
-
let (parsed_previous_results, parsed_next_results) =
|
94 |
-
(previous_results?, next_results?);
|
95 |
|
96 |
-
|
97 |
-
[
|
98 |
-
parsed_previous_results.1,
|
99 |
-
results.1.clone(),
|
100 |
-
parsed_next_results.1,
|
101 |
-
],
|
102 |
-
[
|
103 |
-
parsed_previous_results.0,
|
104 |
-
results.0.clone(),
|
105 |
-
parsed_next_results.0,
|
106 |
-
],
|
107 |
-
);
|
108 |
|
109 |
-
|
|
|
|
|
|
|
|
|
110 |
|
111 |
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
112 |
} else {
|
113 |
let (current_results, next_results) =
|
114 |
join!(get_results(page), get_results(page + 1));
|
115 |
|
116 |
-
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
);
|
124 |
|
125 |
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
126 |
}
|
@@ -131,6 +130,7 @@ pub async fn search(
|
|
131 |
&config.style.theme,
|
132 |
&config.style.animation,
|
133 |
query,
|
|
|
134 |
&results.0,
|
135 |
)
|
136 |
.0,
|
@@ -148,7 +148,7 @@ pub async fn search(
|
|
148 |
/// # Arguments
|
149 |
///
|
150 |
/// * `url` - It takes the url of the current page that requested the search results for a
|
151 |
-
///
|
152 |
/// * `config` - It takes a parsed config struct.
|
153 |
/// * `query` - It takes the page number as u32 value.
|
154 |
/// * `req` - It takes the `HttpRequest` struct as a value.
|
@@ -163,7 +163,7 @@ async fn results(
|
|
163 |
query: &str,
|
164 |
page: u32,
|
165 |
search_settings: &server_models::Cookie<'_>,
|
166 |
-
) -> Result<(SearchResults, String), Box<dyn std::error::Error>> {
|
167 |
// eagerly parse cookie value to evaluate safe search level
|
168 |
let safe_search_level = search_settings.safe_search_level;
|
169 |
|
@@ -182,7 +182,7 @@ async fn results(
|
|
182 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
183 |
// handle the data accordingly.
|
184 |
match cached_results {
|
185 |
-
Ok(results) => Ok((results, cache_key)),
|
186 |
Err(_) => {
|
187 |
if safe_search_level == 4 {
|
188 |
let mut results: SearchResults = SearchResults::default();
|
@@ -196,7 +196,7 @@ async fn results(
|
|
196 |
.cache_results(&[results.clone()], &[cache_key.clone()])
|
197 |
.await?;
|
198 |
results.set_safe_search_level(safe_search_level);
|
199 |
-
return Ok((results, cache_key));
|
200 |
}
|
201 |
}
|
202 |
|
@@ -235,7 +235,7 @@ async fn results(
|
|
235 |
.cache_results(&[results.clone()], &[cache_key.clone()])
|
236 |
.await?;
|
237 |
results.set_safe_search_level(safe_search_level);
|
238 |
-
Ok((results, cache_key))
|
239 |
}
|
240 |
}
|
241 |
}
|
|
|
12 |
results::aggregator::aggregate,
|
13 |
};
|
14 |
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
15 |
+
use itertools::Itertools;
|
16 |
use regex::Regex;
|
17 |
+
use std::time::{SystemTime, UNIX_EPOCH};
|
18 |
+
use std::{borrow::Cow, time::Duration};
|
19 |
use tokio::{
|
20 |
fs::File,
|
21 |
io::{AsyncBufReadExt, BufReader},
|
|
|
42 |
config: web::Data<&'static Config>,
|
43 |
cache: web::Data<&'static SharedCache>,
|
44 |
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
|
|
45 |
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
46 |
match ¶ms.q {
|
47 |
Some(query) => {
|
|
|
55 |
|
56 |
// Get search settings using the user's cookie or from the server's config
|
57 |
let mut search_settings: server_models::Cookie<'_> = cookie
|
58 |
+
.as_ref()
|
59 |
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
|
60 |
.unwrap_or_else(|| {
|
61 |
server_models::Cookie::build(
|
|
|
85 |
let previous_page = page.saturating_sub(1);
|
86 |
let next_page = page + 1;
|
87 |
|
88 |
+
// Add a random delay before making the request.
|
89 |
+
if config.aggregator.random_delay || config.debug {
|
90 |
+
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
91 |
+
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
92 |
+
tokio::time::sleep(Duration::from_secs(delay)).await;
|
93 |
+
}
|
94 |
+
|
95 |
+
let results: (SearchResults, String, bool);
|
96 |
if page != previous_page {
|
97 |
let (previous_results, current_results, next_results) = join!(
|
98 |
get_results(previous_page),
|
99 |
get_results(page),
|
100 |
get_results(next_page)
|
101 |
);
|
|
|
|
|
102 |
|
103 |
+
results = current_results?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
+
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
|
106 |
+
[previous_results?, results.clone(), next_results?]
|
107 |
+
.into_iter()
|
108 |
+
.filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key)))
|
109 |
+
.multiunzip();
|
110 |
|
111 |
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
112 |
} else {
|
113 |
let (current_results, next_results) =
|
114 |
join!(get_results(page), get_results(page + 1));
|
115 |
|
116 |
+
results = current_results?;
|
117 |
|
118 |
+
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
|
119 |
+
[results.clone(), next_results?]
|
120 |
+
.into_iter()
|
121 |
+
.filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key)))
|
122 |
+
.multiunzip();
|
|
|
123 |
|
124 |
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
125 |
}
|
|
|
130 |
&config.style.theme,
|
131 |
&config.style.animation,
|
132 |
query,
|
133 |
+
page,
|
134 |
&results.0,
|
135 |
)
|
136 |
.0,
|
|
|
148 |
/// # Arguments
|
149 |
///
|
150 |
/// * `url` - It takes the url of the current page that requested the search results for a
|
151 |
+
/// particular search query.
|
152 |
/// * `config` - It takes a parsed config struct.
|
153 |
/// * `query` - It takes the page number as u32 value.
|
154 |
/// * `req` - It takes the `HttpRequest` struct as a value.
|
|
|
163 |
query: &str,
|
164 |
page: u32,
|
165 |
search_settings: &server_models::Cookie<'_>,
|
166 |
+
) -> Result<(SearchResults, String, bool), Box<dyn std::error::Error>> {
|
167 |
// eagerly parse cookie value to evaluate safe search level
|
168 |
let safe_search_level = search_settings.safe_search_level;
|
169 |
|
|
|
182 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
183 |
// handle the data accordingly.
|
184 |
match cached_results {
|
185 |
+
Ok(results) => Ok((results, cache_key, false)),
|
186 |
Err(_) => {
|
187 |
if safe_search_level == 4 {
|
188 |
let mut results: SearchResults = SearchResults::default();
|
|
|
196 |
.cache_results(&[results.clone()], &[cache_key.clone()])
|
197 |
.await?;
|
198 |
results.set_safe_search_level(safe_search_level);
|
199 |
+
return Ok((results, cache_key, true));
|
200 |
}
|
201 |
}
|
202 |
|
|
|
235 |
.cache_results(&[results.clone()], &[cache_key.clone()])
|
236 |
.await?;
|
237 |
results.set_safe_search_level(safe_search_level);
|
238 |
+
Ok((results, cache_key, true))
|
239 |
}
|
240 |
}
|
241 |
}
|
src/templates/partials/bar.rs
CHANGED
@@ -14,9 +14,13 @@ use maud::{html, Markup, PreEscaped};
|
|
14 |
/// It returns the compiled html code for the search bar as a result.
|
15 |
pub fn bar(query: &str) -> Markup {
|
16 |
html!(
|
|
|
17 |
(PreEscaped("<div class=\"search_bar\">"))
|
18 |
-
input type="search" name="
|
19 |
-
button type="
|
|
|
|
|
|
|
20 |
img src="./images/magnifying_glass.svg" alt="Info icon for error box";
|
21 |
}
|
22 |
)
|
|
|
14 |
/// It returns the compiled html code for the search bar as a result.
|
15 |
pub fn bar(query: &str) -> Markup {
|
16 |
html!(
|
17 |
+
(PreEscaped("<form action=\"/search\">"))
|
18 |
(PreEscaped("<div class=\"search_bar\">"))
|
19 |
+
input type="search" name="q" value=(query) placeholder="Type to search";
|
20 |
+
button type="button" onclick="clearSearchText()" {
|
21 |
+
img src="./images/close.svg" alt="Clear button icon for clearing search input text";
|
22 |
+
}
|
23 |
+
button type="submit" {
|
24 |
img src="./images/magnifying_glass.svg" alt="Info icon for error box";
|
25 |
}
|
26 |
)
|
src/templates/partials/search_bar.rs
CHANGED
@@ -12,7 +12,7 @@ const SAFE_SEARCH_LEVELS_NAME: [&str; 3] = ["None", "Low", "Moderate"];
|
|
12 |
/// # Arguments
|
13 |
///
|
14 |
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
|
15 |
-
///
|
16 |
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
|
17 |
/// * `query` - It takes the current search query provided by user as an argument.
|
18 |
///
|
@@ -29,7 +29,7 @@ pub fn search_bar(
|
|
29 |
(bar(query))
|
30 |
.error_box {
|
31 |
@if !engine_errors_info.is_empty(){
|
32 |
-
button onclick="toggleErrorBox()" class="error_box_toggle_button"{
|
33 |
img src="./images/warning.svg" alt="Info icon for error box";
|
34 |
}
|
35 |
.dropdown_error_box{
|
@@ -43,7 +43,7 @@ pub fn search_bar(
|
|
43 |
}
|
44 |
}
|
45 |
@else {
|
46 |
-
button onclick="toggleErrorBox()" class="error_box_toggle_button"{
|
47 |
img src="./images/info.svg" alt="Warning icon for error box";
|
48 |
}
|
49 |
.dropdown_error_box {
|
@@ -56,10 +56,10 @@ pub fn search_bar(
|
|
56 |
(PreEscaped("</div>"))
|
57 |
.search_options {
|
58 |
@if safe_search_level >= 3 {
|
59 |
-
(PreEscaped("<select name=\"
|
60 |
}
|
61 |
@else{
|
62 |
-
(PreEscaped("<select name=\"
|
63 |
}
|
64 |
@for (idx, name) in SAFE_SEARCH_LEVELS_NAME.iter().enumerate() {
|
65 |
@if (safe_search_level as usize) == idx {
|
@@ -71,6 +71,7 @@ pub fn search_bar(
|
|
71 |
}
|
72 |
(PreEscaped("</select>"))
|
73 |
}
|
|
|
74 |
}
|
75 |
)
|
76 |
}
|
|
|
12 |
/// # Arguments
|
13 |
///
|
14 |
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
|
15 |
+
/// search engine which failed to provide results as an argument.
|
16 |
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
|
17 |
/// * `query` - It takes the current search query provided by user as an argument.
|
18 |
///
|
|
|
29 |
(bar(query))
|
30 |
.error_box {
|
31 |
@if !engine_errors_info.is_empty(){
|
32 |
+
button type="button" onclick="toggleErrorBox()" class="error_box_toggle_button"{
|
33 |
img src="./images/warning.svg" alt="Info icon for error box";
|
34 |
}
|
35 |
.dropdown_error_box{
|
|
|
43 |
}
|
44 |
}
|
45 |
@else {
|
46 |
+
button type="button" onclick="toggleErrorBox()" class="error_box_toggle_button"{
|
47 |
img src="./images/info.svg" alt="Warning icon for error box";
|
48 |
}
|
49 |
.dropdown_error_box {
|
|
|
56 |
(PreEscaped("</div>"))
|
57 |
.search_options {
|
58 |
@if safe_search_level >= 3 {
|
59 |
+
(PreEscaped("<select name=\"safesearch\" disabled>"))
|
60 |
}
|
61 |
@else{
|
62 |
+
(PreEscaped(format!("<select name=\"safesearch\" value=\"{}\">", safe_search_level)))
|
63 |
}
|
64 |
@for (idx, name) in SAFE_SEARCH_LEVELS_NAME.iter().enumerate() {
|
65 |
@if (safe_search_level as usize) == idx {
|
|
|
71 |
}
|
72 |
(PreEscaped("</select>"))
|
73 |
}
|
74 |
+
(PreEscaped("</form>"))
|
75 |
}
|
76 |
)
|
77 |
}
|
src/templates/partials/settings_tabs/engines.rs
CHANGED
@@ -9,7 +9,7 @@ use maud::{html, Markup};
|
|
9 |
/// # Arguments
|
10 |
///
|
11 |
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
|
12 |
-
///
|
13 |
///
|
14 |
/// # Returns
|
15 |
///
|
|
|
9 |
/// # Arguments
|
10 |
///
|
11 |
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
|
12 |
+
/// selected (enabled/disabled) value as an argument.
|
13 |
///
|
14 |
/// # Returns
|
15 |
///
|
src/templates/partials/settings_tabs/general.rs
CHANGED
@@ -37,6 +37,21 @@ pub fn general(safe_search_level: u8) -> Markup {
|
|
37 |
option value=(SAFE_SEARCH_LEVELS[2].0){(SAFE_SEARCH_LEVELS[2].1)}
|
38 |
}
|
39 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
}
|
41 |
)
|
42 |
}
|
|
|
37 |
option value=(SAFE_SEARCH_LEVELS[2].0){(SAFE_SEARCH_LEVELS[2].1)}
|
38 |
}
|
39 |
}
|
40 |
+
h3{"Restore preferences from file"}
|
41 |
+
p class="description"{
|
42 |
+
"Select a json file to restore preferences for the search engine."
|
43 |
+
}
|
44 |
+
form method="post" enctype="multipart/form-data" onsubmit="setTimeout(function () { window.location.reload(); }, 10)" action="/settings" target="_self"{
|
45 |
+
input type="file" name="file" accept="application/json";
|
46 |
+
button type="submit" {"Submit"}
|
47 |
+
}
|
48 |
+
h3{"Export preferences to a file"}
|
49 |
+
p class="description"{
|
50 |
+
"Export all the settings saved as a cookie as a json file."
|
51 |
+
}
|
52 |
+
a class="export_btn" href="./download" download="settings.json" {
|
53 |
+
button type="submit" {"Export"}
|
54 |
+
}
|
55 |
}
|
56 |
)
|
57 |
}
|
src/templates/partials/settings_tabs/user_interface.rs
CHANGED
@@ -11,9 +11,9 @@ use std::fs::read_dir;
|
|
11 |
/// # Arguments
|
12 |
///
|
13 |
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
|
14 |
-
///
|
15 |
/// * `selected_style` - It takes the currently selected style value provided via the config file
|
16 |
-
///
|
17 |
///
|
18 |
/// # Error
|
19 |
///
|
|
|
11 |
/// # Arguments
|
12 |
///
|
13 |
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
|
14 |
+
/// argument.
|
15 |
/// * `selected_style` - It takes the currently selected style value provided via the config file
|
16 |
+
/// as an argument.
|
17 |
///
|
18 |
/// # Error
|
19 |
///
|
src/templates/views/search.rs
CHANGED
@@ -24,6 +24,7 @@ pub fn search(
|
|
24 |
theme: &str,
|
25 |
animation: &Option<String>,
|
26 |
query: &str,
|
|
|
27 |
search_results: &SearchResults,
|
28 |
) -> Markup {
|
29 |
html!(
|
@@ -108,15 +109,14 @@ pub fn search(
|
|
108 |
}
|
109 |
}
|
110 |
.page_navigation {
|
111 |
-
|
112 |
(PreEscaped("←")) "previous"
|
113 |
}
|
114 |
-
|
|
|
115 |
}
|
116 |
}
|
117 |
script src="static/index.js"{}
|
118 |
-
script src="static/search_area_options.js"{}
|
119 |
-
script src="static/pagination.js"{}
|
120 |
script src="static/error_box.js"{}
|
121 |
(footer())
|
122 |
)
|
|
|
24 |
theme: &str,
|
25 |
animation: &Option<String>,
|
26 |
query: &str,
|
27 |
+
page: u32,
|
28 |
search_results: &SearchResults,
|
29 |
) -> Markup {
|
30 |
html!(
|
|
|
109 |
}
|
110 |
}
|
111 |
.page_navigation {
|
112 |
+
a href=(format!("/search?q={}&safesearch={}&page={}", query, search_results.safe_search_level, if page > 1 {page-1} else {1})) {
|
113 |
(PreEscaped("←")) "previous"
|
114 |
}
|
115 |
+
a href=(format!("/search?q={}&safesearch={}&page={}", query, search_results.safe_search_level, page+2)) {
|
116 |
+
"next" (PreEscaped("→"))}
|
117 |
}
|
118 |
}
|
119 |
script src="static/index.js"{}
|
|
|
|
|
120 |
script src="static/error_box.js"{}
|
121 |
(footer())
|
122 |
)
|
websurfx/config.lua
CHANGED
@@ -10,7 +10,7 @@ production_use = false -- whether to use production mode or not (in other words
|
|
10 |
-- if production_use is set to true
|
11 |
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
12 |
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
13 |
-
|
14 |
pool_idle_connection_timeout = 30 -- timeout for the idle connections in the reqwest HTTP connection pool (value in seconds).
|
15 |
rate_limiter = {
|
16 |
number_of_requests = 50, -- The number of request that are allowed within a provided time limit.
|
@@ -19,6 +19,12 @@ rate_limiter = {
|
|
19 |
-- Set whether the server will use an adaptive/dynamic HTTPS window size, see https://httpwg.org/specs/rfc9113.html#fc-principles
|
20 |
https_adaptive_window_size = false
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
-- ### Search ###
|
23 |
-- Filter results based on different levels. The levels provided are:
|
24 |
-- {{
|
@@ -70,4 +76,7 @@ upstream_search_engines = {
|
|
70 |
LibreX = false,
|
71 |
Mojeek = false,
|
72 |
Bing = false,
|
|
|
73 |
} -- select the upstream search engines from which the results should be fetched.
|
|
|
|
|
|
10 |
-- if production_use is set to true
|
11 |
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
12 |
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
13 |
+
tcp_connection_keep_alive = 30 -- the amount of time the tcp connection should remain alive to the upstream search engines (or connected to the server). (value in seconds).
|
14 |
pool_idle_connection_timeout = 30 -- timeout for the idle connections in the reqwest HTTP connection pool (value in seconds).
|
15 |
rate_limiter = {
|
16 |
number_of_requests = 50, -- The number of request that are allowed within a provided time limit.
|
|
|
19 |
-- Set whether the server will use an adaptive/dynamic HTTPS window size, see https://httpwg.org/specs/rfc9113.html#fc-principles
|
20 |
https_adaptive_window_size = false
|
21 |
|
22 |
+
operating_system_tls_certificates = true -- Set whether the server will use operating system's tls certificates alongside rustls certificates while fetching search results from the upstream engines.
|
23 |
+
|
24 |
+
number_of_https_connections = 10 -- the number of https connections that should be available in the connection pool.
|
25 |
+
-- Set keep-alive timer in seconds; keeps clients connected to the HTTP server, different from the connection to upstream search engines
|
26 |
+
client_connection_keep_alive = 120
|
27 |
+
|
28 |
-- ### Search ###
|
29 |
-- Filter results based on different levels. The levels provided are:
|
30 |
-- {{
|
|
|
76 |
LibreX = false,
|
77 |
Mojeek = false,
|
78 |
Bing = false,
|
79 |
+
Wikipedia = true,
|
80 |
} -- select the upstream search engines from which the results should be fetched.
|
81 |
+
|
82 |
+
proxy = nil -- Proxy to send outgoing requests through. Set to nil to disable.
|