Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from sklearn.model_selection import train_test_split
|
|
5 |
from sklearn.metrics import classification_report
|
6 |
from tqdm import tqdm
|
7 |
import gradio as gr
|
|
|
8 |
|
9 |
model_name = 'neuralmind/bert-base-portuguese-cased'
|
10 |
tokenizer = BertTokenizer.from_pretrained(model_name)
|
@@ -25,7 +26,16 @@ def predict(model, loader):
|
|
25 |
|
26 |
return predictions
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def generate_predictions(text):
|
|
|
29 |
input_encodings = tokenizer(
|
30 |
text, truncation=True, padding=True, max_length=512, return_tensors='pt'
|
31 |
)
|
|
|
5 |
from sklearn.metrics import classification_report
|
6 |
from tqdm import tqdm
|
7 |
import gradio as gr
|
8 |
+
import string
|
9 |
|
10 |
model_name = 'neuralmind/bert-base-portuguese-cased'
|
11 |
tokenizer = BertTokenizer.from_pretrained(model_name)
|
|
|
26 |
|
27 |
return predictions
|
28 |
|
29 |
+
|
30 |
+
def preprocess_text(text):
|
31 |
+
# Remove pontuação
|
32 |
+
text = text.translate(str.maketrans("", "", string.punctuation))
|
33 |
+
# Converter para letras minúsculas
|
34 |
+
text = text.lower()
|
35 |
+
return text
|
36 |
+
|
37 |
def generate_predictions(text):
|
38 |
+
text = preprocess_text(text)
|
39 |
input_encodings = tokenizer(
|
40 |
text, truncation=True, padding=True, max_length=512, return_tensors='pt'
|
41 |
)
|