Spaces:
Runtime error
Runtime error
Remove trailing punctuations
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import json
|
|
6 |
from thefuzz import process, fuzz
|
7 |
import numpy as np
|
8 |
import re
|
9 |
-
import
|
10 |
|
11 |
|
12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
|
@@ -39,7 +39,8 @@ def fuzzy_lookup(tweet):
|
|
39 |
# Loop each word in tweet
|
40 |
for word in tweet.split():
|
41 |
# Remove punctuations
|
42 |
-
|
|
|
43 |
# Only get digits and letters then lowercase
|
44 |
processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
|
45 |
scores = []
|
@@ -55,15 +56,15 @@ def fuzzy_lookup(tweet):
|
|
55 |
if len(scores) > 0:
|
56 |
max_score_index = np.argmax(scores)
|
57 |
if matched_words[max_score_index] in lookup_profanity:
|
58 |
-
matches[
|
59 |
|
60 |
|
61 |
-
for
|
62 |
-
word_split =
|
63 |
for pronoun in obj_pronouns:
|
64 |
if len(word_split) > 1:
|
65 |
if pronoun == word_split[-1]:
|
66 |
-
matches[
|
67 |
break
|
68 |
|
69 |
# Replace each profanities by fuzzy lookup result
|
|
|
6 |
from thefuzz import process, fuzz
|
7 |
import numpy as np
|
8 |
import re
|
9 |
+
from string import punctuation
|
10 |
|
11 |
|
12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
|
|
|
39 |
# Loop each word in tweet
|
40 |
for word in tweet.split():
|
41 |
# Remove punctuations
|
42 |
+
word = word.strip(punctuation)
|
43 |
+
|
44 |
# Only get digits and letters then lowercase
|
45 |
processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
|
46 |
scores = []
|
|
|
56 |
if len(scores) > 0:
|
57 |
max_score_index = np.argmax(scores)
|
58 |
if matched_words[max_score_index] in lookup_profanity:
|
59 |
+
matches[word] = matched_words[max_score_index]
|
60 |
|
61 |
|
62 |
+
for word, matched_profanity in matches.items():
|
63 |
+
word_split = word.split(matched_profanity[-2:])
|
64 |
for pronoun in obj_pronouns:
|
65 |
if len(word_split) > 1:
|
66 |
if pronoun == word_split[-1]:
|
67 |
+
matches[word] = matched_profanity + ' ' + pronoun
|
68 |
break
|
69 |
|
70 |
# Replace each profanities by fuzzy lookup result
|