Spaces:

gabeorlanski
/

bc_eval

Runtime error

File size: 3,836 Bytes
import datetime
import gc
import multiprocessing as mp
import pathlib
import subprocess
from dataclasses import dataclass
from typing import Dict, List

from tqdm import tqdm

@dataclass
class CommandResult:
    return_code: int
    runtime: float
    stdout: str
    stderr: str
    timed_out: bool 

def safe_execute(
    command_to_run: List[str],
    working_dir: pathlib.Path,
    timeout: int = 10,
) -> CommandResult:
    """Executes a list of commands safely.

    Args:
      command_to_run: The command to run.
      working_dir: The working directory to run them in.
      timeout Timeout.

    Returns:
      The result of executing the command.
    """
    timed_out = False
    return_code = -1
    runtime = timeout
    stderr = None
    stdout = None
    start_time = datetime.datetime.now()
    execution_process = subprocess.Popen(
        command_to_run,
        cwd=str(working_dir),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    try:
        outputs = execution_process.communicate(timeout=timeout)
    
        stdout, stderr = outputs
        stdout = stdout.decode('utf-8')
        stderr = stderr.decode('utf-8')
        runtime = (datetime.datetime.now() - start_time).total_seconds()
        return_code = execution_process.returncode
    except subprocess.TimeoutExpired:
        timed_out = True
        runtime = timeout
    finally:
        execution_process.kill()
    
    return CommandResult(
        return_code=return_code,
        runtime=runtime,
        stderr=stderr,
        stdout=stdout,
        timed_out=timed_out,
    )


def execute_code(sample: Dict):
    """Execute a file of code.

    Args:
        sample: The sample to run.

    Returns:
        The execution result.
    """
    file_path = sample["cwd"]
    working_dir_for_execution = (
        file_path.parent if file_path.is_file() else file_path
    )
    working_dir_for_execution = working_dir_for_execution.resolve().absolute()
    timed_out = False
    failed = False
    results = []
    for command in sample['commands']:
        res = safe_execute(command['command'], working_dir=working_dir_for_execution, timeout=command['timeout'])
        results.append(res)
        if res.timed_out:
            timed_out = True 
            break
        if res.return_code != 0:
            failed = True
            break    
    return {
        "qid":sample['qid'],
        "idx": sample["idx"],
        "file_path": str(file_path.absolute().resolve()),
        "results": results,
        "failed":failed,
        "timed_out": timed_out,
    }




def execute_predictions(
    predictions: List[Dict],
    num_workers: int = 1,
    max_task_per_child: int = 1,
    garbage_collection_freq: int = 500,
):
    """Execute a list of predictions in a specific language.

    Args:
        predictions: List of predictions.
        num_workers: The number of workers to use.
        max_task_per_child: The maximum tasks ran per child before it is killed.
        garbage_collection_freq: How often to run garbage collection.

    Returns:
        The the array of raw execution results and the total runtime.
    """

    # Make the arguments to submit to the ThreadPoolExecutor. Do it here so we
    # can have a progress bar as well.
    num_to_complete = len(predictions)
    num_completed = 0
    results = []
    with mp.Pool(num_workers, maxtasksperchild=max_task_per_child) as pool:
        for result in tqdm(
            pool.imap_unordered(execute_code, predictions),
            total=num_to_complete,
            desc="Executing",
        ):
            num_completed += 1

            results.append(result)

            if num_completed % garbage_collection_freq == 0:
                gc.collect()
        # Cleanup pool
        pool.close()
        pool.terminate()
    return results