Spaces:

polygraf-ai
/

copyright_checker

Runtime error

Ali Asgarov commited on Feb 17, 2024

Commit

cf6e402

1 Parent(s): 566b7f7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -188,6 +188,12 @@ text_mc_model_path = "polygraf-ai/text-detect-mc-bert-base-uncased-v1-bert-429k"
 text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
 text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
 def remove_special_characters(text):
     text = remove_accents(text)
     pattern = r'[^\w\s\d.,!?\'"()-;]+'

 text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
 text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
+def remove_accents(input_str):
+    # nfkd_form = unicodedata.normalize('NFKD', input_str)
+    # return "".join([char for char in nfkd_form if not unicodedata.combining(char)])
+    text_no_accents = unidecode(input_str)
+    return text_no_accents
 def remove_special_characters(text):
     text = remove_accents(text)
     pattern = r'[^\w\s\d.,!?\'"()-;]+'