task,metric,value,err,version anli_r1,acc,0.323,0.014794927843348639,0 anli_r2,acc,0.332,0.014899597242811475,0 anli_r3,acc,0.3275,0.013553211167251961,0 arc_challenge,acc,0.29948805460750855,0.013385021637313565,0 arc_challenge,acc_norm,0.31313993174061433,0.013552671543623504,0 arc_easy,acc,0.6426767676767676,0.00983320561246312,0 arc_easy,acc_norm,0.625,0.009933992677987828,0 boolq,acc,0.618960244648318,0.008493937524439337,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.32470238095238096,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4792869946225851,0.004985498055190358,0 hellaswag,acc_norm,0.6384186417048396,0.004794764843685288,0 piqa,acc,0.7557127312295974,0.010024765172284247,0 piqa,acc_norm,0.7616974972796517,0.009940334245876222,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.919,0.008632121032139964,0 sciq,acc_norm,0.911,0.009008893392651526,0 storycloze_2016,acc,0.7306253340459647,0.010258997754057014,0 winogrande,acc,0.5880031570639306,0.013833112857645937,0