Spaces:

ivanpalenciab
/

sentiment_analysis_app

Sleeping

App Files Files Community

sentiment_analysis_app / model.py

ivanpalenciab

Upload 4 files

97f77d9 verified 4 months ago

raw

history blame contribute delete

2.85 kB

	import torch
	from torch.utils.data import Dataset, DataLoader
	from torch import nn
	from transformers import BertModel, BertTokenizer


	RANDOM_SEED = 30
	MAX_LEN = 200
	BATCH_SIZE = 16
	NCLASSES = 3

	#device selection
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	#tokenitation
	PRE_TRAINED_MODEL_NAME = 'dccuchile/bert-base-spanish-wwm-cased'
	tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

	#Model Class
	class BERTSentimentClassifier(nn.Module):

	def __init__(self, n_classes):
	super(BERTSentimentClassifier, self).__init__()
	self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
	self.drop = nn.Dropout(p=0.3)
	self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)

	def forward(self, input_ids, attention_mask):
	outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask)
	cls_output = outputs.pooler_output
	drop_output = self.drop(cls_output)
	output = self.linear(drop_output)
	return output


	model = BERTSentimentClassifier(NCLASSES)
	model.load_state_dict(torch.load("Models/BERTo_model_parameters.pth", map_location=torch.device('cpu')))
	model.to(device)
	model.eval()

	def classifySentiment(review_text):
	encoding_review = tokenizer.encode_plus(
	review_text,
	max_length = MAX_LEN,
	truncation = True,
	add_special_tokens = True,
	return_token_type_ids = False,
	padding="max_length",
	return_attention_mask = True,
	return_tensors = 'pt'
	)

	input_ids = encoding_review['input_ids'].to(device)
	attention_mask = encoding_review['attention_mask'].to(device)
	with torch.no_grad():
	output = model(input_ids, attention_mask)
	prediction = torch.argmax(output, dim=1) # Getting class with more probability

	#Mapping the class with 3 cattegory
	sentiment_labels = {0: "Positivo", 1: "Neutral", 2: "Negativo"}

	return review_text, sentiment_labels[prediction.item()]

	def groupClassifier(df):
	"""This function allow to clasify a group of sentiment that should come in a dataframe of pandas"""
	class SentimentDataset(Dataset):
	def __init__(self, texts):
	self.texts = texts

	def __len__(self):
	return len(self.texts)

	def __getitem__(self, idx):
	return self.texts[idx]

	dataset = SentimentDataset(df["text"].tolist())
	dataloader = DataLoader(dataset, batch_size=32, num_workers=0)

	clasification = {
	"text": [],
	"label": []
	}

	for batch in dataloader:
	for text in batch:
	original_text,label = classifySentiment(text)
	clasification["text"].append(original_text)
	clasification["label"].append(label)
	return clasification