Spaces:
Sleeping
Sleeping
Update src/lib/server/websearch/runWebSearch.ts
Browse files
src/lib/server/websearch/runWebSearch.ts
CHANGED
@@ -10,6 +10,7 @@ import {
|
|
10 |
} from "$lib/server/websearch/sentenceSimilarity";
|
11 |
import type { Conversation } from "$lib/types/Conversation";
|
12 |
import type { MessageUpdate } from "$lib/types/MessageUpdate";
|
|
|
13 |
|
14 |
const MAX_N_PAGES_SCRAPE = 10 as const;
|
15 |
const MAX_N_PAGES_EMBED = 5 as const;
|
@@ -17,7 +18,8 @@ const MAX_N_PAGES_EMBED = 5 as const;
|
|
17 |
export async function runWebSearch(
|
18 |
conv: Conversation,
|
19 |
prompt: string,
|
20 |
-
updatePad: (upd: MessageUpdate) => void
|
|
|
21 |
) {
|
22 |
const messages = (() => {
|
23 |
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
|
@@ -26,6 +28,7 @@ export async function runWebSearch(
|
|
26 |
const webSearch: WebSearch = {
|
27 |
prompt: prompt,
|
28 |
searchQuery: "",
|
|
|
29 |
results: [],
|
30 |
context: "",
|
31 |
contextSources: [],
|
@@ -33,12 +36,40 @@ export async function runWebSearch(
|
|
33 |
updatedAt: new Date(),
|
34 |
};
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
|
37 |
updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
|
38 |
}
|
39 |
|
40 |
try {
|
41 |
webSearch.searchQuery = await generateQuery(messages);
|
|
|
|
|
42 |
appendUpdate("Searching Google", [webSearch.searchQuery]);
|
43 |
const results = await searchWeb(webSearch.searchQuery);
|
44 |
webSearch.results =
|
@@ -63,7 +94,7 @@ export async function runWebSearch(
|
|
63 |
text = await parseWeb(link);
|
64 |
appendUpdate("Browsing webpage", [link]);
|
65 |
} catch (e) {
|
66 |
-
|
67 |
}
|
68 |
const MAX_N_CHUNKS = 100;
|
69 |
const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
|
@@ -111,4 +142,4 @@ export async function runWebSearch(
|
|
111 |
}
|
112 |
|
113 |
return webSearch;
|
114 |
-
}
|
|
|
10 |
} from "$lib/server/websearch/sentenceSimilarity";
|
11 |
import type { Conversation } from "$lib/types/Conversation";
|
12 |
import type { MessageUpdate } from "$lib/types/MessageUpdate";
|
13 |
+
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
14 |
|
15 |
const MAX_N_PAGES_SCRAPE = 10 as const;
|
16 |
const MAX_N_PAGES_EMBED = 5 as const;
|
|
|
18 |
export async function runWebSearch(
|
19 |
conv: Conversation,
|
20 |
prompt: string,
|
21 |
+
updatePad: (upd: MessageUpdate) => void,
|
22 |
+
domainFiltersStr: string
|
23 |
) {
|
24 |
const messages = (() => {
|
25 |
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
|
|
|
28 |
const webSearch: WebSearch = {
|
29 |
prompt: prompt,
|
30 |
searchQuery: "",
|
31 |
+
domainFilters: [],
|
32 |
results: [],
|
33 |
context: "",
|
34 |
contextSources: [],
|
|
|
36 |
updatedAt: new Date(),
|
37 |
};
|
38 |
|
39 |
+
|
40 |
+
function extractDomains(domainFiltersStr: string): string[] {
|
41 |
+
const parts = domainFiltersStr.split(/[ ,]+/);
|
42 |
+
const domains: string[] = [];
|
43 |
+
const domainPattern = /^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
44 |
+
for (const part of parts) {
|
45 |
+
if (domainPattern.test(part)) {
|
46 |
+
domains.push(part);
|
47 |
+
}
|
48 |
+
}
|
49 |
+
|
50 |
+
return domains;
|
51 |
+
}
|
52 |
+
|
53 |
+
webSearch.domainFilters = extractDomains(domainFiltersStr);
|
54 |
+
|
55 |
+
function formatSearchQuery(domainFilters: string[]): string {
|
56 |
+
if (domainFilters.length === 0) {
|
57 |
+
return "";
|
58 |
+
}
|
59 |
+
|
60 |
+
const filteredQueryPrefix = domainFilters.map((domain) => `site:${domain}`).join(" OR ") + " ";
|
61 |
+
|
62 |
+
return filteredQueryPrefix;
|
63 |
+
}
|
64 |
+
|
65 |
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
|
66 |
updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
|
67 |
}
|
68 |
|
69 |
try {
|
70 |
webSearch.searchQuery = await generateQuery(messages);
|
71 |
+
// limit the sources to certain sites
|
72 |
+
webSearch.searchQuery = formatSearchQuery(webSearch.domainFilters) + webSearch.searchQuery;
|
73 |
appendUpdate("Searching Google", [webSearch.searchQuery]);
|
74 |
const results = await searchWeb(webSearch.searchQuery);
|
75 |
webSearch.results =
|
|
|
94 |
text = await parseWeb(link);
|
95 |
appendUpdate("Browsing webpage", [link]);
|
96 |
} catch (e) {
|
97 |
+
console.error(`Error parsing webpage "${link}"`, e);
|
98 |
}
|
99 |
const MAX_N_CHUNKS = 100;
|
100 |
const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
|
|
|
142 |
}
|
143 |
|
144 |
return webSearch;
|
145 |
+
}
|