File size: 4,595 Bytes
62f8b70
 
 
 
 
 
 
 
 
 
 
 
5ea436e
 
62f8b70
5ea436e
62f8b70
5ea436e
 
 
 
62f8b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ea436e
 
 
 
 
62f8b70
97a885e
 
5ea436e
97a885e
5ea436e
97a885e
 
 
 
 
 
 
 
 
 
 
 
 
 
62f8b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97a885e
62f8b70
 
 
 
 
 
 
 
 
 
 
 
 
 
97a885e
62f8b70
 
97a885e
62f8b70
97a885e
 
62f8b70
 
 
 
 
5ea436e
62f8b70
5ea436e
 
62f8b70
 
5ea436e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import json
import subprocess
import pandas as pd

from generate import get_solution_file_path, all_models
import time

import os
import subprocess


def evaluate_submission(day: int, model: str, timeout = 60 * 5):
    """Evaluates the python code of a submission for the given day and model.

    Returns the result captured from stdout and the total time taken.

    Does not score the actual submission (e.g. reward a star), this comes later.
    Timeout (seconds) is used to halt the program after that amount of time, in case infinite loops arise.
    If errors are produced, they are also returned.
    """

    # cd to the day directory
    os.chdir(f"day{day:02d}")

    # get the solution file path, check if it exists
    file_path = get_solution_file_path(model=model)
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist, skipping")
        return
    else:
        print(f"Evaluating {file_path} for day {day} with model {model}")

    # run the solution, and capture the output
    start_time = time.time()
    try:
        result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
        print(f"Result: {result.stdout}")
    except subprocess.TimeoutExpired:
        result = subprocess.CompletedProcess(args=["python", file_path], returncode=1, stdout="", stderr="Timeout")
        print(f"Timeout after {timeout} seconds")
    end_time = time.time()

    total_time = end_time - start_time

    result = result.stdout if result.returncode == 0 else f"Error: {result.stderr}"

    os.chdir("..")

    return {
        "result": result,
        "total_time": total_time,
    }


def get_solution_code(day: int, model: str) -> str:
    """Returns the solution code (as a string) for the given day and model."""
    file_path = get_solution_file_path(day=day, model=model)
    with open(file_path, "r") as file:
        return file.read()


def extract_solutions(df, model: str, output_file = "solutions.json") -> dict:
    """This will get all solutions produced by the model, and use those as 'ground truth', which can be used to score other models.

    Results saved in a .json format
    """
    solutions = {}
    for day in range(1, 26):

        sub_df = df[(df.model == model) & (df.day == day)]
        day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")

        if len(day_solution) == 0:
            part1 = "N/A"
            part2 = "N/A"
        elif len(day_solution) == 1:
            part1 = day_solution[0]
            part2 = "N/A"
        elif len(day_solution) == 2:
            part1, part2 = day_solution
        else:
            print("Something went wrong, check day {day} solution: \n {day_solution}")
            part1 = "N/A"
            part2 = "N/A"


        solutions[day] = [part1, part2]

    with open(output_file, "w") as f:
        json.dump(solutions, f, indent=2)

    return solutions


def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
    """Runs the python code and collects their results"""

    if os.path.exists(results_file):
        df = pd.read_csv(results_file)
    else:
        df = pd.DataFrame(columns=["day", "model", "result", "total_time"])

    for day in range(1, 26):
        print("*" * 80)
        print(f"Evaluating day {day}")
        for provider in all_models:
            for model in all_models[provider]:
                print("-" * 80)

                if df.loc[(df["day"] == day) & (df["model"] == model)].shape[0] > 0 and skip:
                    print(f"Skipping {provider} {model} for day {day} because it already exists")
                    continue

                print(f"Evaluating day {day} with model {model}")
                result = evaluate_submission(day, model)
                df = pd.concat([df, pd.DataFrame({"day": [day], "model": [model], "result": [result["result"]], "total_time": [result["total_time"]]})], ignore_index=True)

                # Save incrementally
                df.to_csv("results.csv", index=False)
                print("-" * 80)

        print("*" * 80)
        df = df.sort_values(by="day")
        df.to_csv("results.csv", index=False)
    return df



if __name__ == "__main__":
    # Add my submissions to the list of available models, for convenience
    all_models["human"] = ["jerpint"]

    # Collects all outputs from running the python code
    df = evaluate_submissions(all_models, results_file="results.csv")

    # Extracts solutions
    solutions = extract_solutions(df, output_file="solutions.json", model = "jerpint")