MariaK commited on
Commit
70be46f
·
1 Parent(s): 0010d1f

Update src/lib/server/websearch/runWebSearch.ts

Browse files
src/lib/server/websearch/runWebSearch.ts CHANGED
@@ -10,6 +10,7 @@ import {
10
  } from "$lib/server/websearch/sentenceSimilarity";
11
  import type { Conversation } from "$lib/types/Conversation";
12
  import type { MessageUpdate } from "$lib/types/MessageUpdate";
 
13
 
14
  const MAX_N_PAGES_SCRAPE = 10 as const;
15
  const MAX_N_PAGES_EMBED = 5 as const;
@@ -17,7 +18,8 @@ const MAX_N_PAGES_EMBED = 5 as const;
17
  export async function runWebSearch(
18
  conv: Conversation,
19
  prompt: string,
20
- updatePad: (upd: MessageUpdate) => void
 
21
  ) {
22
  const messages = (() => {
23
  return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
@@ -26,6 +28,7 @@ export async function runWebSearch(
26
  const webSearch: WebSearch = {
27
  prompt: prompt,
28
  searchQuery: "",
 
29
  results: [],
30
  context: "",
31
  contextSources: [],
@@ -33,12 +36,40 @@ export async function runWebSearch(
33
  updatedAt: new Date(),
34
  };
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
37
  updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
38
  }
39
 
40
  try {
41
  webSearch.searchQuery = await generateQuery(messages);
 
 
42
  appendUpdate("Searching Google", [webSearch.searchQuery]);
43
  const results = await searchWeb(webSearch.searchQuery);
44
  webSearch.results =
@@ -63,7 +94,7 @@ export async function runWebSearch(
63
  text = await parseWeb(link);
64
  appendUpdate("Browsing webpage", [link]);
65
  } catch (e) {
66
- // ignore errors
67
  }
68
  const MAX_N_CHUNKS = 100;
69
  const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
@@ -111,4 +142,4 @@ export async function runWebSearch(
111
  }
112
 
113
  return webSearch;
114
- }
 
10
  } from "$lib/server/websearch/sentenceSimilarity";
11
  import type { Conversation } from "$lib/types/Conversation";
12
  import type { MessageUpdate } from "$lib/types/MessageUpdate";
13
+ import { webSearchParameters } from "$lib/stores/webSearchParameters";
14
 
15
  const MAX_N_PAGES_SCRAPE = 10 as const;
16
  const MAX_N_PAGES_EMBED = 5 as const;
 
18
  export async function runWebSearch(
19
  conv: Conversation,
20
  prompt: string,
21
+ updatePad: (upd: MessageUpdate) => void,
22
+ domainFiltersStr: string
23
  ) {
24
  const messages = (() => {
25
  return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
 
28
  const webSearch: WebSearch = {
29
  prompt: prompt,
30
  searchQuery: "",
31
+ domainFilters: [],
32
  results: [],
33
  context: "",
34
  contextSources: [],
 
36
  updatedAt: new Date(),
37
  };
38
 
39
+
40
+ function extractDomains(domainFiltersStr: string): string[] {
41
+ const parts = domainFiltersStr.split(/[ ,]+/);
42
+ const domains: string[] = [];
43
+ const domainPattern = /^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
44
+ for (const part of parts) {
45
+ if (domainPattern.test(part)) {
46
+ domains.push(part);
47
+ }
48
+ }
49
+
50
+ return domains;
51
+ }
52
+
53
+ webSearch.domainFilters = extractDomains(domainFiltersStr);
54
+
55
+ function formatSearchQuery(domainFilters: string[]): string {
56
+ if (domainFilters.length === 0) {
57
+ return "";
58
+ }
59
+
60
+ const filteredQueryPrefix = domainFilters.map((domain) => `site:${domain}`).join(" OR ") + " ";
61
+
62
+ return filteredQueryPrefix;
63
+ }
64
+
65
  function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
66
  updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
67
  }
68
 
69
  try {
70
  webSearch.searchQuery = await generateQuery(messages);
71
+ // limit the sources to certain sites
72
+ webSearch.searchQuery = formatSearchQuery(webSearch.domainFilters) + webSearch.searchQuery;
73
  appendUpdate("Searching Google", [webSearch.searchQuery]);
74
  const results = await searchWeb(webSearch.searchQuery);
75
  webSearch.results =
 
94
  text = await parseWeb(link);
95
  appendUpdate("Browsing webpage", [link]);
96
  } catch (e) {
97
+ console.error(`Error parsing webpage "${link}"`, e);
98
  }
99
  const MAX_N_CHUNKS = 100;
100
  const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
 
142
  }
143
 
144
  return webSearch;
145
+ }