PyCodeT5 / evaluator.py
S-Dreamer's picture
Update evaluator.py
44314b6 verified
# evaluator.py
import torch
from torchmetrics.text.bleu import BLEUScore
from torchmetrics.text.rouge import ROUGEScore
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
class CodeEvaluator:
def __init__(self, model_name="S-Dreamer/PyCodeT5"):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model.to(self.device)
self.bleu = BLEUScore(n_gram=4).to(self.device) # use GPU if possible
self.rouge = ROUGEScore().to(self.device)
def evaluate(self, nl_input, target_code):
self.model.eval() # Set model to evaluation mode
with torch.no_grad(): # Disable gradient calculations
inputs = self.tokenizer(nl_input, return_tensors="pt").to(self.device)
outputs = self.model.generate(
**inputs,
max_length=512,
num_beams=5,
early_stopping=True,
)
generated_code = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
bleu_score = self.bleu(generated_code, target_code)
rouge_score = self.rouge(generated_code, target_code)
return bleu_score, rouge_score
if __name__ == "__main__":
evaluator = CodeEvaluator()
nl_input = "Write a Python function to reverse a string."
target_code = """def reverse_string(s):
return s[::-1]
"""
bleu_score, rouge_score = evaluator.evaluate(nl_input, target_code)
print(f"BLEU score: {bleu_score}")
print(f"ROUGE score: {rouge_score}")