aliasgerovs commited on
Commit
cf6e402
1 Parent(s): 566b7f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -188,6 +188,12 @@ text_mc_model_path = "polygraf-ai/text-detect-mc-bert-base-uncased-v1-bert-429k"
188
  text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
189
  text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
190
 
 
 
 
 
 
 
191
  def remove_special_characters(text):
192
  text = remove_accents(text)
193
  pattern = r'[^\w\s\d.,!?\'"()-;]+'
 
188
  text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
189
  text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
190
 
191
+ def remove_accents(input_str):
192
+ # nfkd_form = unicodedata.normalize('NFKD', input_str)
193
+ # return "".join([char for char in nfkd_form if not unicodedata.combining(char)])
194
+ text_no_accents = unidecode(input_str)
195
+ return text_no_accents
196
+
197
  def remove_special_characters(text):
198
  text = remove_accents(text)
199
  pattern = r'[^\w\s\d.,!?\'"()-;]+'