task,metric,value,err,version anli_r1,acc,0.306,0.014580006055436969,0 anli_r2,acc,0.33,0.014876872027456734,0 anli_r3,acc,0.3308333333333333,0.013588208070709007,0 arc_challenge,acc,0.29948805460750855,0.013385021637313565,0 arc_challenge,acc_norm,0.3148464163822526,0.01357265770308495,0 arc_easy,acc,0.6388888888888888,0.00985601342581124,0 arc_easy,acc_norm,0.6182659932659933,0.009968648851839672,0 boolq,acc,0.5889908256880734,0.008605429733982185,1 cb,acc,0.25,0.058387420812114225,1 cb,f1,0.2376010151606224,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.47679745070703045,0.004984405935541087,0 hellaswag,acc_norm,0.6308504282015535,0.004815882719278393,0 piqa,acc,0.750272034820457,0.010099232969867488,0 piqa,acc_norm,0.763873775843308,0.009908965890558218,0 rte,acc,0.48736462093862815,0.030086851767188564,0 sciq,acc,0.902,0.009406619184621238,0 sciq,acc_norm,0.89,0.009899393819724444,0 storycloze_2016,acc,0.7199358631747729,0.01038376499392048,0 winogrande,acc,0.6101026045777427,0.013707547317008462,0