|
import logging
|
|
|
|
from utils import check_correctness
|
|
|
|
from evaluation.benchmarks.mint.tasks.base import Task
|
|
|
|
LOGGER = logging.getLogger('MINT')
|
|
|
|
|
|
class CodeGenTask(Task):
|
|
"""Generic code generation task instance."""
|
|
|
|
def __init__(self, id: str, prompt: str, reference: str, **kwargs):
|
|
super().__init__(**kwargs)
|
|
self._id = id
|
|
self._prompt = prompt
|
|
self._reference = reference
|
|
|
|
def success(self, solution: str) -> bool:
|
|
"""This checks whether the given solution can complete the current task.
|
|
|
|
Can be used to provides binary feedback.
|
|
"""
|
|
code_to_exec = self.extract_answer(solution)
|
|
LOGGER.debug(f'CODE_TO_EXEC:\n{code_to_exec}')
|
|
LOGGER.debug(f'TEST_CODE:\n{self._reference}')
|
|
res = check_correctness(
|
|
solution_code=code_to_exec, test_code=self._reference, timeout=10
|
|
)
|
|
return res['success']
|
|
|
|
|
|
class MBPPTask(CodeGenTask):
|
|
task_name = 'mbpp'
|
|
|
|
@property
|
|
def prompt(self) -> str:
|
|
"""Return the prompt for this task.
|
|
|
|
MBPP prompt contains \"\"\" enclosed at both ends. Need to remove it.
|
|
"""
|
|
return self._prompt.replace('"""', '').strip()
|
|
|
|
def extract_answer(self, solution: str) -> str | None:
|
|
"""Extract the answer from the given solution.
|
|
|
|
Split off first block of code by scanning for class, def etc. on newlines.
|
|
|
|
Modified from:
|
|
https://github.com/bigcode-project/bigcode-evaluation-harness/blob/d61afde130005ecc65cf800ad8eca790a9bc2115/lm_eval/tasks/mbpp.py#L67
|
|
"""
|
|
|
|
|
|
return solution
|
|
|
|
|
|
class HumanEvalTask(CodeGenTask):
|
|
task_name = 'humaneval'
|
|
|
|
@property
|
|
def prompt(self) -> str:
|
|
"""Return the prompt for this task.
|
|
|
|
MBPP prompt contains \"\"\" enclosed at both ends. Need to remove it.
|
|
"""
|
|
return 'Complete the following code:\n\n' + self._prompt
|
|
|
|
def extract_answer(self, solution: str) -> str | None:
|
|
"""Extract the answer from the given solution.
|
|
|
|
Split off first block of code by scanning for class, def etc. on newlines.
|
|
|
|
Modified from:
|
|
https://github.com/bigcode-project/bigcode-evaluation-harness/blob/d61afde130005ecc65cf800ad8eca790a9bc2115/lm_eval/tasks/humaneval.py#L56
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
return solution
|
|
|