|
[ |
|
{ |
|
"agent_name": "GenericAgent-GPT-o1-mini", |
|
"study_id": "2024-10-23_14-17-40", |
|
"date_time": "2024-10-23 22:30:06", |
|
"benchmark": "WorkArena-L1", |
|
"score": 56.7, |
|
"std_err": 2.7, |
|
"benchmark_specific": "No", |
|
"benchmark_tuned": "No", |
|
"followed_evaluation_protocol": "Yes", |
|
"reproducible": "Yes", |
|
"comments": "NA", |
|
"original_or_reproduced": "Original" |
|
}, |
|
{ |
|
"agent_name": "GenericAgent-GPT-o1-mini", |
|
"study_id": "f3e1fcb8-5fc5-4115-9e00-27251508e2c7", |
|
"date_time": "2025-02-07 14:00:00", |
|
"benchmark": "WorkArena-L1", |
|
"score": 51.8, |
|
"std_err": 2.80, |
|
"benchmark_specific": "No", |
|
"benchmark_tuned": "No", |
|
"followed_evaluation_protocol": "Yes", |
|
"reproducible": "Yes", |
|
"comments": "Additional details", |
|
"original_or_reproduced": "Reproduced" |
|
} |
|
] |