File size: 1,347 Bytes
e086f23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import torch
class PerplexityEvaluator(object):
def __init__(self, model, tokenizer, ignore_index=-1):
self.model = model
self.tokenizer = tokenizer
self.ignore_index = ignore_index
def __call__(self, text, context=None):
return self.log_perplexity(text, context)
def log_perplexity(self, text, context=None):
"""
Evaluate log perplexity of text with respect to the language model
based on the context
:param text:
:param context:
:return:
"""
device = self.model.device
text_ids = self.tokenizer(text, return_tensors='pt')
if context:
context_ids = self.tokenizer(context, return_tensors='pt')
input_ids = torch.concatenate([context_ids['input_ids'], text_ids['input_ids']], axis=1)
labels = torch.concatenate([torch.ones_like(context_ids['input_ids']) * self.ignore_index,
text_ids['input_ids']], axis=1)
print("Warning, need to remove context length when reporting lppx")
else:
input_ids = text_ids['input_ids']
labels = input_ids
loss = self.model(input_ids=input_ids.to(device), labels=labels.to(device)).loss
return loss.cpu().detach().numpy() |