File size: 2,760 Bytes
eece183 fe491e4 07847cc fe491e4 eece183 fe491e4 eece183 07847cc eece183 95491b9 eece183 95491b9 cb9ee2f eece183 cb9ee2f ad55efb eece183 0bfefe0 eece183 5d8204d ad55efb eece183 0bfefe0 eece183 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import torch
from torch import nn
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm
import gradio as gr
import string
model_name = 'neuralmind/bert-base-portuguese-cased'
tokenizer = BertTokenizer.from_pretrained(model_name)
def predict(model, loader):
model.eval()
predictions = []
with torch.no_grad():
for batch in loader:
input_ids, attention_mask = batch
input_ids = input_ids.to(device)
attention_mask = attention_mask.to(device)
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
batch_predictions = logits.argmax(dim=1).cpu().tolist()
predictions.extend(batch_predictions)
return predictions
def preprocess_text(text):
# Remove pontuação
text = text.translate(str.maketrans("", "", string.punctuation))
# Converter para letras minúsculas
text = text.lower()
return text
def generate_predictions(text):
sentences = text.split(".")
sentences = [preprocess_text(sentence) for sentence in sentences]
predictions = []
for sentence in sentences:
input_encodings = tokenizer(
sentence, truncation=True, padding=True, max_length=512, return_tensors='pt'
)
input_dataset = torch.utils.data.TensorDataset(
input_encodings['input_ids'], input_encodings['attention_mask']
)
input_loader = torch.utils.data.DataLoader(
input_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True
)
# Make predictions
sentence_prediction = predict(loaded_model, input_loader)[0]
predictions.append(f"{sentence}: {sentence_prediction}")
predictions_html = "<br>".join(predictions)
return predictions_html
# Specify the device as CPU
device = torch.device('cpu')
# Load the saved model and map it to the CPU
loaded_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
loaded_model.load_state_dict(torch.load('best_model8.pt', map_location=device))
loaded_model.to(device)
# Define the Gradio interface
iface = gr.Interface(
fn=generate_predictions,
inputs=gr.inputs.Textbox(lines=5, label="Input Text"),
outputs=gr.outputs.Label(num_top_classes=2, label="Prediction"),
examples=[
["Seu Comunista!"],
['Os imigrantes não deveriam ser impedidos de entrar no meu país'],
['Os imigrantes deveriam ser impedidos de entrar no meu país'],
['eu te amo'],
['aquele cara é um babaca'],
]
)
# Launch the interface
iface.launch()
|