import nlp from 'compromise'; | |
export const normalizeWord = (word: string, language: string = 'en'): string => { | |
let processedWord = word; | |
// Only apply compromise for English | |
if (language === 'en') { | |
const doc = nlp(word); | |
processedWord = doc.nouns().toSingular().out('text'); | |
// Handle cases where compromise doesn't produce output | |
if (!processedWord) { | |
processedWord = word; | |
} | |
} | |
// Apply standard normalization for all languages | |
return processedWord | |
.normalize('NFD') | |
.replace(/[\u0300-\u036f]/g, '') | |
.toLowerCase() | |
// Handle German umlauts and their alternative spellings | |
.replace(/ü/g, 'ue') | |
.replace(/ä/g, 'ae') | |
.replace(/ö/g, 'oe') | |
.replace(/ß/g, 'ss') | |
.replace(/[^a-z]/g, '') | |
.trim(); | |
}; | |