ivanpalenciab's picture
Upload 4 files
97f77d9 verified
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from transformers import BertModel, BertTokenizer
RANDOM_SEED = 30
MAX_LEN = 200
BATCH_SIZE = 16
NCLASSES = 3
#device selection
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#tokenitation
PRE_TRAINED_MODEL_NAME = 'dccuchile/bert-base-spanish-wwm-cased'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
#Model Class
class BERTSentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(BERTSentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.3)
self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask)
cls_output = outputs.pooler_output
drop_output = self.drop(cls_output)
output = self.linear(drop_output)
return output
model = BERTSentimentClassifier(NCLASSES)
model.load_state_dict(torch.load("Models/BERTo_model_parameters.pth", map_location=torch.device('cpu')))
model.to(device)
model.eval()
def classifySentiment(review_text):
encoding_review = tokenizer.encode_plus(
review_text,
max_length = MAX_LEN,
truncation = True,
add_special_tokens = True,
return_token_type_ids = False,
padding="max_length",
return_attention_mask = True,
return_tensors = 'pt'
)
input_ids = encoding_review['input_ids'].to(device)
attention_mask = encoding_review['attention_mask'].to(device)
with torch.no_grad():
output = model(input_ids, attention_mask)
prediction = torch.argmax(output, dim=1) # Getting class with more probability
#Mapping the class with 3 cattegory
sentiment_labels = {0: "Positivo", 1: "Neutral", 2: "Negativo"}
return review_text, sentiment_labels[prediction.item()]
def groupClassifier(df):
"""This function allow to clasify a group of sentiment that should come in a dataframe of pandas"""
class SentimentDataset(Dataset):
def __init__(self, texts):
self.texts = texts
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
return self.texts[idx]
dataset = SentimentDataset(df["text"].tolist())
dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
clasification = {
"text": [],
"label": []
}
for batch in dataloader:
for text in batch:
original_text,label = classifySentiment(text)
clasification["text"].append(original_text)
clasification["label"].append(label)
return clasification