Spaces:
Runtime error
Runtime error
import gradio as gr | |
import requests | |
import emoji | |
import re | |
API_URL = "https://api-inference.huggingface.co/models/Dabid/test2" | |
headers = {"Authorization": "Bearer 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"} | |
profanities = ['bobo', 'bobong', 'bwiset', 'bwisit', 'buwisit', 'buwiset', 'bwesit', 'gago', 'gagong', 'kupal', | |
'pakshet', 'pakyu', 'pucha', 'puchang', | |
'punyeta', 'punyetang', 'puta', 'putang', 'putangina', 'putanginang', 'tanga', 'tangang', 'tangina', | |
'tanginang', 'tarantado', 'tarantadong', 'ulol'] | |
contractions = { | |
'di': 'hindi', | |
'to': 'ito', | |
'no': 'ano', | |
'kundi': 'kung hindi', | |
'nya': 'niya', | |
'nyo': 'ninyo', | |
'niyo': 'ninyo', | |
'pano': 'paano', | |
'sainyo': 'sa inyo', | |
'sayo': 'sa iyo', | |
'pag': 'kapag', | |
'kesa': 'kaysa', | |
'dun': 'doon', | |
'ganto': 'ganito', | |
'nandun': 'nandoon', | |
'saka': 'tsaka', | |
'ung': 'yung', | |
'wag': 'huwag', | |
'sya': 'siya', | |
'bat': 'bakit', | |
'yon': 'iyon', | |
'yun': 'iyon', | |
'dyan': 'diyan', | |
'jan': 'diyan', | |
'andito': 'nandito', | |
'tanginamo': 'tangina mo', | |
'putanginamo': 'putangina mo', | |
'san': 'saan', | |
'ganun': 'ganoon', | |
'gagong': 'gago na', | |
'bobong': 'bobo na', | |
'tangang': 'tanga na', | |
'kelan': 'kailan', | |
'raw': 'daw', | |
'tanginang': 'tangina na', | |
'tarantadong': 'tarantado na', | |
'putang ina': 'putangina', | |
'putang inang': 'putangina', | |
'putanginang': 'putangina', | |
'itong': 'ito ang', | |
'lng': 'lang', | |
'bwisit': 'bwiset', | |
'bwesit': 'bwiset', | |
'buwisit': 'bwiset', | |
'buwesit': 'bwiset' | |
} | |
def preprocess(row): | |
laugh_texts = ['hahaha', 'wahaha', 'hahaa', 'ahha', 'haaha', 'hahah', 'ahah', 'hha'] | |
symbols = ['@', '#'] | |
# Lowercase | |
row = row.lower() | |
# Remove emojis | |
row = emoji.replace_emoji(row, replace='') | |
# Replace elongated words 'grabeee' -> 'grabe' (not applicable on 2 corresponding letter) | |
row = re.sub(r'(.)\1{2,}', r'\1', row) | |
# Split sentence into list of words | |
row_split = row.split() | |
for index, word in enumerate(row_split): | |
# Remove words with symbols (e.g. @username, #hashtags) | |
if any(x in word for x in symbols): | |
row_split[index] = '' | |
# Remove links | |
if 'http' in word: | |
row_split[index] = '' | |
# Unify laugh texts format to 'haha' | |
if any(x in word for x in laugh_texts): | |
row_split[index] = 'haha' | |
# Remove words with digits (4ever) | |
if any(x.isdigit() for x in word): | |
row_split[index] = '' | |
# Combine list of words back to sentence | |
combined_text = ' '.join(filter(None, row_split)) | |
# Check if output contains single word then return null | |
if len(combined_text.split()) == 1: | |
return combined_text | |
# Filter needed characters | |
combined_text = re.sub(r"[^A-Za-z ]+", '', combined_text) | |
# Expand Contractions | |
for i in contractions.items(): | |
combined_text = re.sub(rf"\b{i[0]}\b", i[1], combined_text) | |
return combined_text | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
def predict(text): | |
print(preprocess(text)) | |
try: | |
output = query(preprocess(text))[0] | |
except Exception: | |
return "Loading", "The model is loading please try again later." | |
output = [tuple(i.values()) for i in output] | |
output = dict((x, y) for x, y in output) | |
predicted_label = list(output.keys())[0] | |
if predicted_label == 'Abusive': | |
output_text = text | |
for profanity in profanities: | |
compiled = re.compile(re.escape(profanity), re.IGNORECASE) | |
mask = "" | |
for i in profanity: | |
mask += "*" if i != " " else " " | |
output_text = compiled.sub(mask, output_text) | |
return output, output_text | |
else: | |
return output, text | |
hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks') | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')], | |
outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION"), | |
gr.components.Text(label='OUTPUT')], | |
examples=['Tangina mo naman sobrang yabang mo gago!!๐ ๐ค @davidrafael', | |
'Napakainit ngayong araw pakshet namaaan!!', | |
'Napakabagal naman ng wifi tangina #PLDC #HelloDITO', | |
'Bobo ka ba? napakadali lang nyan eh... ๐คก', | |
'Uy gago laptrip yung nangyare samen kanina HAHAHA๐๐'], | |
allow_flagging="manual", | |
flagging_callback=hf_writer, | |
flagging_options=['Good bot', 'Bad bot'] | |
) | |
demo.launch() | |