Spaces:
Running
Running
aliasgerovs
commited on
Commit
•
cf6e402
1
Parent(s):
566b7f7
Update app.py
Browse files
app.py
CHANGED
@@ -188,6 +188,12 @@ text_mc_model_path = "polygraf-ai/text-detect-mc-bert-base-uncased-v1-bert-429k"
|
|
188 |
text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
|
189 |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
|
190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
def remove_special_characters(text):
|
192 |
text = remove_accents(text)
|
193 |
pattern = r'[^\w\s\d.,!?\'"()-;]+'
|
|
|
188 |
text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
|
189 |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
|
190 |
|
191 |
+
def remove_accents(input_str):
|
192 |
+
# nfkd_form = unicodedata.normalize('NFKD', input_str)
|
193 |
+
# return "".join([char for char in nfkd_form if not unicodedata.combining(char)])
|
194 |
+
text_no_accents = unidecode(input_str)
|
195 |
+
return text_no_accents
|
196 |
+
|
197 |
def remove_special_characters(text):
|
198 |
text = remove_accents(text)
|
199 |
pattern = r'[^\w\s\d.,!?\'"()-;]+'
|