Spaces:
Running
Running
File size: 4,595 Bytes
62f8b70 5ea436e 62f8b70 5ea436e 62f8b70 5ea436e 62f8b70 5ea436e 62f8b70 97a885e 5ea436e 97a885e 5ea436e 97a885e 62f8b70 97a885e 62f8b70 97a885e 62f8b70 97a885e 62f8b70 97a885e 62f8b70 5ea436e 62f8b70 5ea436e 62f8b70 5ea436e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import os
import json
import subprocess
import pandas as pd
from generate import get_solution_file_path, all_models
import time
import os
import subprocess
def evaluate_submission(day: int, model: str, timeout = 60 * 5):
"""Evaluates the python code of a submission for the given day and model.
Returns the result captured from stdout and the total time taken.
Does not score the actual submission (e.g. reward a star), this comes later.
Timeout (seconds) is used to halt the program after that amount of time, in case infinite loops arise.
If errors are produced, they are also returned.
"""
# cd to the day directory
os.chdir(f"day{day:02d}")
# get the solution file path, check if it exists
file_path = get_solution_file_path(model=model)
if not os.path.exists(file_path):
print(f"File {file_path} does not exist, skipping")
return
else:
print(f"Evaluating {file_path} for day {day} with model {model}")
# run the solution, and capture the output
start_time = time.time()
try:
result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
print(f"Result: {result.stdout}")
except subprocess.TimeoutExpired:
result = subprocess.CompletedProcess(args=["python", file_path], returncode=1, stdout="", stderr="Timeout")
print(f"Timeout after {timeout} seconds")
end_time = time.time()
total_time = end_time - start_time
result = result.stdout if result.returncode == 0 else f"Error: {result.stderr}"
os.chdir("..")
return {
"result": result,
"total_time": total_time,
}
def get_solution_code(day: int, model: str) -> str:
"""Returns the solution code (as a string) for the given day and model."""
file_path = get_solution_file_path(day=day, model=model)
with open(file_path, "r") as file:
return file.read()
def extract_solutions(df, model: str, output_file = "solutions.json") -> dict:
"""This will get all solutions produced by the model, and use those as 'ground truth', which can be used to score other models.
Results saved in a .json format
"""
solutions = {}
for day in range(1, 26):
sub_df = df[(df.model == model) & (df.day == day)]
day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
if len(day_solution) == 0:
part1 = "N/A"
part2 = "N/A"
elif len(day_solution) == 1:
part1 = day_solution[0]
part2 = "N/A"
elif len(day_solution) == 2:
part1, part2 = day_solution
else:
print("Something went wrong, check day {day} solution: \n {day_solution}")
part1 = "N/A"
part2 = "N/A"
solutions[day] = [part1, part2]
with open(output_file, "w") as f:
json.dump(solutions, f, indent=2)
return solutions
def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
"""Runs the python code and collects their results"""
if os.path.exists(results_file):
df = pd.read_csv(results_file)
else:
df = pd.DataFrame(columns=["day", "model", "result", "total_time"])
for day in range(1, 26):
print("*" * 80)
print(f"Evaluating day {day}")
for provider in all_models:
for model in all_models[provider]:
print("-" * 80)
if df.loc[(df["day"] == day) & (df["model"] == model)].shape[0] > 0 and skip:
print(f"Skipping {provider} {model} for day {day} because it already exists")
continue
print(f"Evaluating day {day} with model {model}")
result = evaluate_submission(day, model)
df = pd.concat([df, pd.DataFrame({"day": [day], "model": [model], "result": [result["result"]], "total_time": [result["total_time"]]})], ignore_index=True)
# Save incrementally
df.to_csv("results.csv", index=False)
print("-" * 80)
print("*" * 80)
df = df.sort_values(by="day")
df.to_csv("results.csv", index=False)
return df
if __name__ == "__main__":
# Add my submissions to the list of available models, for convenience
all_models["human"] = ["jerpint"]
# Collects all outputs from running the python code
df = evaluate_submissions(all_models, results_file="results.csv")
# Extracts solutions
solutions = extract_solutions(df, output_file="solutions.json", model = "jerpint") |