[ | |
{ | |
"agent_name": "GenericAgent-Claude-3.5-Sonnet", | |
"study_id": "d93a2398-2b70-41ce-b989-364fed988d73", | |
"benchmark": "AssistantBench", | |
"score": 5.2, | |
"std_err": 1.5, | |
"benchmark_specific": "No", | |
"benchmark_tuned": "No", | |
"followed_evaluation_protocol": "Yes", | |
"reproducible": "Yes", | |
"comments": "Intersection of finished tasks across agents.", | |
"original_or_reproduced": "Original", | |
"date_time": "2024-11-28 19:34:58" | |
} | |
] |