File size: 4,396 Bytes
62f8b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import json
import subprocess
import pandas as pd
# from sklearn.manifold import TSNE

from generate import get_solution_file_path, all_models
from openai import OpenAI
import time

import os
import subprocess



client = OpenAI()


def evaluate_submission(day: int, model: str):
    """Evaluates the submission for the given day and model. Returns the result captured from stdout and the total time taken."""

    # cd to the day directory
    os.chdir(f"day{day:02d}")

    # get the solution file path, check if it exists
    file_path = get_solution_file_path(model=model)
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist, skipping")
        return
    else:
        print(f"Evaluating {file_path} for day {day} with model {model}")

    # run the solution, and capture the output
    timeout = 60 * 5
    start_time = time.time()
    try:
        result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
        print(f"Result: {result.stdout}")
    except subprocess.TimeoutExpired:
        result = subprocess.CompletedProcess(args=["python", file_path], returncode=1, stdout="", stderr="Timeout")
        print(f"Timeout after {timeout} seconds")
    end_time = time.time()

    total_time = end_time - start_time

    result = result.stdout if result.returncode == 0 else f"Error: {result.stderr}"

    os.chdir("..")

    return {
        "result": result,
        "total_time": total_time,
    }


def get_solution_code(day: int, model: str) -> str:
    """Returns the solution code (as a string) for the given day and model."""
    file_path = get_solution_file_path(day=day, model=model)
    with open(file_path, "r") as file:
        return file.read()


def extract_solutions(df, output_file = "solutions.json"):
    # TODO: better way of getting this?
    solutions = {}
    for day in range(1, 25):
        sub_df = df[(df.model == "jerpint") & (df.day == day)]
        part1, part2 = sub_df.result.to_list()[0].strip("\n").split("\n")
        solutions[day] = [part1, part2]

    with open(output_file, "w") as f:
        json.dump(solutions, f, indent=2)

    return solutions


def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
    """Runs the python code and collects their results"""

    if os.path.exists(results_file):
        df = pd.read_csv(results_file)
    else:
        df = pd.DataFrame(columns=["day", "model", "result", "total_time"])

    # for day in range(1, 26):
    for day in range(1, 11):
        print("*" * 80)
        print(f"Evaluating day {day}")
        for provider in all_models:
            for model in all_models[provider]:
                print("-" * 80)

                if df.loc[(df["day"] == day) & (df["model"] == model)].shape[0] > 0 and skip:
                    print(f"Skipping {provider} {model} for day {day} because it already exists")
                    continue

                print(f"Evaluating day {day} with model {model}")
                result = evaluate_submission(day, model)
                df = pd.concat([df, pd.DataFrame({"day": [day], "model": [model], "result": [result["result"]], "total_time": [result["total_time"]]})], ignore_index=True)

                df.to_csv("results.csv", index=False)
                print("-" * 80)
        print("*" * 80)
    return df



if __name__ == "__main__":
    all_models["human"] = ["jerpint"]
    df = evaluate_submissions(all_models, results_file="results.csv")

    # For now, only evaluate first 9 days
    # TODO: All days
    df = df[df.day < 10]

    # Run once to save results
    # solutions = extract_solutions(df)

    with open("solutions.json") as f:
        solutions = json.load(f)

    def score_submissions(row):
        result = row["result"]
        day = row["day"]
        solution = solutions[str(day)]

        score_1 = solution[0] in result
        score_2 = solution[1] in result
        return [score_1, score_2]


    df["scores"] = df.apply(score_submissions, axis=1)

    df["part_1"] = df["scores"].apply(lambda x: x[0])
    df["part_2"] = df["scores"].apply(lambda x: x[1])

    for model in df.model.unique():
        df_model = df[df.model == model]
        silver_stars = df_model.part_1.sum()
        gold_stars = df_model.part_2.sum()
        total_stars = silver_stars + gold_stars

        print(model, total_stars)