Spaces:
Runtime error
Runtime error
Fixed profanities in english lookup words
Browse files
app.py
CHANGED
@@ -40,6 +40,7 @@ obj_pronouns = read_text('obj_pronouns')
|
|
40 |
profanities = read_text('profanities', 'json')
|
41 |
eng_words = set(words.words())
|
42 |
|
|
|
43 |
|
44 |
def fuzzy_lookup(tweet):
|
45 |
|
@@ -47,7 +48,7 @@ def fuzzy_lookup(tweet):
|
|
47 |
lookup_profanity = np.concatenate([np.hstack(list(profanities.values())), list(profanities.keys())])
|
48 |
|
49 |
for word in tweet.split():
|
50 |
-
if word in eng_words:
|
51 |
break
|
52 |
scores = []
|
53 |
matched_words = []
|
@@ -66,6 +67,7 @@ def fuzzy_lookup(tweet):
|
|
66 |
if matched_words[max_score_index] in lookup_profanity:
|
67 |
matched_profanity[word] = matched_words[max_score_index]
|
68 |
|
|
|
69 |
for word, profanity in matched_profanity.items():
|
70 |
word_split = word.split(profanity[-2:])
|
71 |
for pronoun in obj_pronouns:
|
|
|
40 |
profanities = read_text('profanities', 'json')
|
41 |
eng_words = set(words.words())
|
42 |
|
43 |
+
# TODO check eng words that are tagalog profanities
|
44 |
|
45 |
def fuzzy_lookup(tweet):
|
46 |
|
|
|
48 |
lookup_profanity = np.concatenate([np.hstack(list(profanities.values())), list(profanities.keys())])
|
49 |
|
50 |
for word in tweet.split():
|
51 |
+
if word in list(set(eng_words) - set(lookup_profanity)):
|
52 |
break
|
53 |
scores = []
|
54 |
matched_words = []
|
|
|
67 |
if matched_words[max_score_index] in lookup_profanity:
|
68 |
matched_profanity[word] = matched_words[max_score_index]
|
69 |
|
70 |
+
# Expand Pronouns in Profanities
|
71 |
for word, profanity in matched_profanity.items():
|
72 |
word_split = word.split(profanity[-2:])
|
73 |
for pronoun in obj_pronouns:
|