eval-leaderboard / data /populate_results.py
jwilles's picture
Add data
b0b7fbb
raw
history blame
1.47 kB
import json
def get_log_url(model_name: str, log_file_name: str) -> str:
"""Returns the URL to the log file for a given model and benchmark"""
if log_file_name is None:
return None
else:
# replace .json with .eval
log_file_name = log_file_name.replace(".json", ".eval")
return f"https://storage.googleapis.com/inspect-evals/eval/{model_name}/index.html?log_file=logs/logs/{log_file_name}"
def main():
# Load the results and log file names
with open("data/results.json", "r") as f:
results = json.load(f)
with open("data/inspect_log_file_names.json", "r") as f:
log_files = json.load(f)
# For each model in results
for model_name, model_data in results.items():
# Get the log files for this model
model_logs = log_files.get(model_name, {})
# For each task in the model's results
for task_name, task_data in model_data["results"].items():
# Get the log file name for this task
log_file_name = model_logs.get(task_name)
# Add the log URL to the task data
if log_file_name:
task_data["log_url"] = get_log_url(model_name, log_file_name)
else:
task_data["log_url"] = None
# Save the updated results
with open("data/results_with_logs.json", "w") as f:
json.dump(results, f, indent=4)
if __name__ == "__main__":
main()