DaviLima commited on
Commit
07847cc
·
1 Parent(s): 1d365d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -5,6 +5,7 @@ from sklearn.model_selection import train_test_split
5
  from sklearn.metrics import classification_report
6
  from tqdm import tqdm
7
  import gradio as gr
 
8
 
9
  model_name = 'neuralmind/bert-base-portuguese-cased'
10
  tokenizer = BertTokenizer.from_pretrained(model_name)
@@ -25,7 +26,16 @@ def predict(model, loader):
25
 
26
  return predictions
27
 
 
 
 
 
 
 
 
 
28
  def generate_predictions(text):
 
29
  input_encodings = tokenizer(
30
  text, truncation=True, padding=True, max_length=512, return_tensors='pt'
31
  )
 
5
  from sklearn.metrics import classification_report
6
  from tqdm import tqdm
7
  import gradio as gr
8
+ import string
9
 
10
  model_name = 'neuralmind/bert-base-portuguese-cased'
11
  tokenizer = BertTokenizer.from_pretrained(model_name)
 
26
 
27
  return predictions
28
 
29
+
30
+ def preprocess_text(text):
31
+ # Remove pontuação
32
+ text = text.translate(str.maketrans("", "", string.punctuation))
33
+ # Converter para letras minúsculas
34
+ text = text.lower()
35
+ return text
36
+
37
  def generate_predictions(text):
38
+ text = preprocess_text(text)
39
  input_encodings = tokenizer(
40
  text, truncation=True, padding=True, max_length=512, return_tensors='pt'
41
  )