File size: 944 Bytes
e80279f d5581cc 52facf3 e80279f f4d95d8 e80279f a35cef0 e80279f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
[
{
"agent_name": "GenericAgent-GPT-o1-mini",
"study_id": "2024-10-23_14-17-40",
"date_time": "2024-10-23 22:30:06",
"benchmark": "WorkArena-L1",
"score": 56.7,
"std_err": 2.7,
"benchmark_specific": "No",
"benchmark_tuned": "No",
"followed_evaluation_protocol": "Yes",
"reproducible": "Yes",
"comments": "NA",
"original_or_reproduced": "Original"
},
{
"agent_name": "GenericAgent-GPT-o1-mini",
"study_id": "f3e1fcb8-5fc5-4115-9e00-27251508e2c7",
"date_time": "2025-02-07 14:00:00",
"benchmark": "WorkArena-L1",
"score": 51.8,
"std_err": 2.80,
"benchmark_specific": "No",
"benchmark_tuned": "No",
"followed_evaluation_protocol": "Yes",
"reproducible": "Yes",
"comments": "Additional details",
"original_or_reproduced": "Reproduced"
}
] |