File size: 1,006 Bytes
7948ff4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import { promises as fs } from "node:fs"

type GoodWordsDB = Record<string, Record<string, string[]>>

// import ngrams from "talisman/tokenizers/ngrams"

let db: GoodWordsDB = {}

export let goodWords: Set<string> = new Set()
export let goodWordsList: string[] = []

export const getGoodWords = async () => {

  if (!Object.entries(db)) {
    const dbFileContent = await fs.readFile("./data/good_words.json", "utf8")
  
    db = JSON.parse(dbFileContent) as GoodWordsDB


    // we don't want those categories to be part of the acceptable, fair use words
    const unwantedCategories = {
      celeb: true
    } as Record<string, boolean>
    
    for (const [category, words] of Object.entries(db)) {
      if (unwantedCategories[category]) {
        continue
      }

      for (const word in words) {
        const normalizedWord = word.trim().toLowerCase()
        goodWords.add(normalizedWord)
        goodWordsList.push(normalizedWord)
      }
    }
  }

  return { db, goodWords, goodWordsList }
}