|
task,metric,value,err,version
|
|
anli_r1,acc,0.306,0.014580006055436969,0
|
|
anli_r2,acc,0.33,0.014876872027456734,0
|
|
anli_r3,acc,0.3308333333333333,0.013588208070709007,0
|
|
arc_challenge,acc,0.29948805460750855,0.013385021637313565,0
|
|
arc_challenge,acc_norm,0.3148464163822526,0.01357265770308495,0
|
|
arc_easy,acc,0.6388888888888888,0.00985601342581124,0
|
|
arc_easy,acc_norm,0.6182659932659933,0.009968648851839672,0
|
|
boolq,acc,0.5889908256880734,0.008605429733982185,1
|
|
cb,acc,0.25,0.058387420812114225,1
|
|
cb,f1,0.2376010151606224,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.47679745070703045,0.004984405935541087,0
|
|
hellaswag,acc_norm,0.6308504282015535,0.004815882719278393,0
|
|
piqa,acc,0.750272034820457,0.010099232969867488,0
|
|
piqa,acc_norm,0.763873775843308,0.009908965890558218,0
|
|
rte,acc,0.48736462093862815,0.030086851767188564,0
|
|
sciq,acc,0.902,0.009406619184621238,0
|
|
sciq,acc_norm,0.89,0.009899393819724444,0
|
|
storycloze_2016,acc,0.7199358631747729,0.01038376499392048,0
|
|
winogrande,acc,0.6101026045777427,0.013707547317008462,0
|
|
|