Spaces:
Runtime error
Runtime error
FIxed bugs
Browse files
app.py
CHANGED
@@ -12,6 +12,11 @@ from string import punctuation
|
|
12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
|
13 |
headers = {"Authorization": "Bearer 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"}
|
14 |
|
|
|
|
|
|
|
|
|
|
|
15 |
def read_text(filename, filetype='txt'):
|
16 |
words = []
|
17 |
|
@@ -31,11 +36,6 @@ lookup_words = read_text('lookup_words')
|
|
31 |
obj_pronouns = read_text('obj_pronouns')
|
32 |
profanities = read_text('profanities', 'json')
|
33 |
|
34 |
-
def query(text):
|
35 |
-
text = {"inputs": text}
|
36 |
-
response = requests.post(API_URL, headers=headers, json=text)
|
37 |
-
return response.json()
|
38 |
-
|
39 |
|
40 |
# for profanity in profanities:
|
41 |
# print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
|
@@ -77,7 +77,7 @@ def fuzzy_lookup(tweet):
|
|
77 |
for pronoun in obj_pronouns:
|
78 |
if len(word_split) > 1:
|
79 |
if pronoun == word_split[-1]:
|
80 |
-
matched_profanity[word] =
|
81 |
break
|
82 |
|
83 |
# Replace each profanities by fuzzy lookup result
|
@@ -120,28 +120,30 @@ def preprocess(tweet):
|
|
120 |
# Combine list of words back to sentence
|
121 |
preprocessed_tweet = ' '.join(filter(None, row_split))
|
122 |
|
|
|
|
|
|
|
123 |
# Check if output contains single word then return null
|
124 |
if len(preprocessed_tweet.split()) == 1:
|
125 |
-
return preprocessed_tweet
|
126 |
|
127 |
# Expand Contractions
|
128 |
for i in contractions.items():
|
129 |
preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
|
130 |
|
131 |
-
# Fuzzy Lookup
|
132 |
-
preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
|
133 |
-
|
134 |
return preprocessed_tweet, matches
|
135 |
|
136 |
|
137 |
def predict(tweet):
|
138 |
-
|
139 |
preprocessed_tweet, matched_profanity = preprocess(tweet)
|
140 |
|
141 |
prediction = query(preprocessed_tweet)
|
142 |
|
143 |
if type(prediction) is dict:
|
144 |
-
return "Model is still loading. Try again."
|
|
|
|
|
145 |
|
146 |
if bool(matched_profanity) == False:
|
147 |
return "No profanity found."
|
@@ -149,9 +151,9 @@ def predict(tweet):
|
|
149 |
prediction = [tuple(i.values()) for i in prediction[0]]
|
150 |
prediction = dict((x, y) for x, y in prediction)
|
151 |
|
152 |
-
print("\
|
153 |
-
print(matched_profanity)
|
154 |
-
print(prediction, "\n")
|
155 |
|
156 |
return prediction
|
157 |
|
@@ -189,18 +191,20 @@ def predict(tweet):
|
|
189 |
# # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
|
190 |
|
191 |
|
192 |
-
demo = gr.Interface(
|
193 |
-
|
|
|
|
|
194 |
|
195 |
-
|
196 |
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
-
|
200 |
-
'Napakainit ngayong araw pakshet namaaan!!',
|
201 |
-
'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
|
202 |
-
'Bobo ka ba? napakadali lang nyan eh... ๐คก',
|
203 |
-
'Uy gago laptrip yung nangyare samen kanina HAHAHA๐๐'],
|
204 |
-
)
|
205 |
|
206 |
-
|
|
|
12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
|
13 |
headers = {"Authorization": "Bearer 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"}
|
14 |
|
15 |
+
def query(text):
|
16 |
+
text = {"inputs": text}
|
17 |
+
response = requests.post(API_URL, headers=headers, json=text)
|
18 |
+
return response.json()
|
19 |
+
|
20 |
def read_text(filename, filetype='txt'):
|
21 |
words = []
|
22 |
|
|
|
36 |
obj_pronouns = read_text('obj_pronouns')
|
37 |
profanities = read_text('profanities', 'json')
|
38 |
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# for profanity in profanities:
|
41 |
# print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
|
|
|
77 |
for pronoun in obj_pronouns:
|
78 |
if len(word_split) > 1:
|
79 |
if pronoun == word_split[-1]:
|
80 |
+
matched_profanity[word] = profanity + ' ' + pronoun
|
81 |
break
|
82 |
|
83 |
# Replace each profanities by fuzzy lookup result
|
|
|
120 |
# Combine list of words back to sentence
|
121 |
preprocessed_tweet = ' '.join(filter(None, row_split))
|
122 |
|
123 |
+
# Fuzzy Lookup
|
124 |
+
preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
|
125 |
+
|
126 |
# Check if output contains single word then return null
|
127 |
if len(preprocessed_tweet.split()) == 1:
|
128 |
+
return preprocessed_tweet, matches
|
129 |
|
130 |
# Expand Contractions
|
131 |
for i in contractions.items():
|
132 |
preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
|
133 |
|
|
|
|
|
|
|
134 |
return preprocessed_tweet, matches
|
135 |
|
136 |
|
137 |
def predict(tweet):
|
138 |
+
|
139 |
preprocessed_tweet, matched_profanity = preprocess(tweet)
|
140 |
|
141 |
prediction = query(preprocessed_tweet)
|
142 |
|
143 |
if type(prediction) is dict:
|
144 |
+
# return "Model is still loading. Try again."
|
145 |
+
print("loading")
|
146 |
+
predict(tweet)
|
147 |
|
148 |
if bool(matched_profanity) == False:
|
149 |
return "No profanity found."
|
|
|
151 |
prediction = [tuple(i.values()) for i in prediction[0]]
|
152 |
prediction = dict((x, y) for x, y in prediction)
|
153 |
|
154 |
+
print("\nTWEET:", tweet)
|
155 |
+
print("DETECTED PROFANITY:", matched_profanity)
|
156 |
+
print("LABELS:", prediction, "\n")
|
157 |
|
158 |
return prediction
|
159 |
|
|
|
191 |
# # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
|
192 |
|
193 |
|
194 |
+
# demo = gr.Interface(
|
195 |
+
# fn=predict,
|
196 |
+
|
197 |
+
# inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
|
198 |
|
199 |
+
# outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],
|
200 |
|
201 |
+
# examples=['Tangina mo naman sobrang yabang mo gago!!๐ ๐ค @davidrafael',
|
202 |
+
# 'Napakainit ngayong araw pakshet namaaan!!',
|
203 |
+
# 'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
|
204 |
+
# 'Bobo ka ba? napakadali lang nyan eh... ๐คก',
|
205 |
+
# 'Uy gago laptrip yung nangyare samen kanina HAHAHA๐๐'],
|
206 |
+
# )
|
207 |
|
208 |
+
# demo.launch()
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
+
predict("asdasd kgjhgjhgj")
|