mcding
published version
ad552d8
import torch
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
# Load GPT-2 large model and tokenizer
def load_perplexity_model_and_tokenizer():
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
ppl_model = GPT2LMHeadModel.from_pretrained("gpt2-large").to(device)
ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2-large")
return ppl_model, ppl_tokenizer
# Compute perplexity for a single prompt
def compute_prompt_perplexity(prompt, models, stride=512):
assert isinstance(prompt, str)
assert isinstance(models, tuple) and len(models) == 2
ppl_model, ppl_tokenizer = models
encodings = ppl_tokenizer(prompt, return_tensors="pt")
max_length = ppl_model.config.n_positions
seq_len = encodings.input_ids.size(1)
nlls = []
prev_end_loc = 0
for begin_loc in range(0, seq_len, stride):
end_loc = min(begin_loc + max_length, seq_len)
trg_len = end_loc - prev_end_loc # may be different from stride on last loop
input_ids = encodings.input_ids[:, begin_loc:end_loc].to(
next(ppl_model.parameters()).device
)
target_ids = input_ids.clone()
target_ids[:, :-trg_len] = -100
with torch.no_grad():
outputs = ppl_model(input_ids, labels=target_ids)
neg_log_likelihood = outputs.loss
nlls.append(neg_log_likelihood)
prev_end_loc = end_loc
if end_loc == seq_len:
break
ppl = torch.exp(torch.stack(nlls).mean()).item()
return ppl