File size: 2,760 Bytes
eece183
 
 
 
 
 
fe491e4
07847cc
fe491e4
eece183
 
fe491e4
eece183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07847cc
 
 
 
 
 
 
eece183
95491b9
 
 
 
 
 
 
 
 
 
 
 
 
 
eece183
95491b9
 
cb9ee2f
eece183
cb9ee2f
 
ad55efb
eece183
 
 
 
 
 
0bfefe0
eece183
 
 
 
 
 
5d8204d
ad55efb
eece183
 
 
0bfefe0
eece183
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import torch
from torch import nn
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm
import gradio as gr
import string

model_name = 'neuralmind/bert-base-portuguese-cased'
tokenizer = BertTokenizer.from_pretrained(model_name)

def predict(model, loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in loader:
            input_ids, attention_mask = batch
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            batch_predictions = logits.argmax(dim=1).cpu().tolist()
            predictions.extend(batch_predictions)

    return predictions


def preprocess_text(text):
    # Remove pontuação
    text = text.translate(str.maketrans("", "", string.punctuation))
    # Converter para letras minúsculas
    text = text.lower()
    return text
def generate_predictions(text):
    sentences = text.split(".")
    sentences = [preprocess_text(sentence) for sentence in sentences]

    predictions = []
    for sentence in sentences:
        input_encodings = tokenizer(
            sentence, truncation=True, padding=True, max_length=512, return_tensors='pt'
        )
        input_dataset = torch.utils.data.TensorDataset(
            input_encodings['input_ids'], input_encodings['attention_mask']
        )
        input_loader = torch.utils.data.DataLoader(
            input_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True
        )

        # Make predictions
        sentence_prediction = predict(loaded_model, input_loader)[0]
        predictions.append(f"{sentence}: {sentence_prediction}")

    predictions_html = "<br>".join(predictions)
    return predictions_html


# Specify the device as CPU
device = torch.device('cpu')

# Load the saved model and map it to the CPU
loaded_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
loaded_model.load_state_dict(torch.load('best_model8.pt', map_location=device))
loaded_model.to(device)

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_predictions,
    inputs=gr.inputs.Textbox(lines=5, label="Input Text"),
    outputs=gr.outputs.Label(num_top_classes=2, label="Prediction"),
     examples=[
        ["Seu Comunista!"],
        ['Os imigrantes não deveriam ser impedidos de entrar no meu país'],
        ['Os imigrantes deveriam ser impedidos de entrar no meu país'],
        ['eu te amo'],
        ['aquele cara é um babaca'],
    ]
)

# Launch the interface
iface.launch()