task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203931,0 anli_r2,acc,0.342,0.01500870618212173,0 anli_r3,acc,0.34,0.013680495725767784,0 arc_challenge,acc,0.27986348122866894,0.013119040897725922,0 arc_challenge,acc_norm,0.29266211604095566,0.01329591610361942,0 arc_easy,acc,0.6224747474747475,0.00994722783346943,0 arc_easy,acc_norm,0.5462962962962963,0.010215708295494117,0 boolq,acc,0.5253822629969419,0.0087337795418535,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.45393112410656267,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4833698466440948,0.004987020679861267,0 hellaswag,acc_norm,0.63433578968333,0.004806316342709393,0 piqa,acc,0.7448313384113167,0.010171571592521822,0 piqa,acc_norm,0.76550598476605,0.00988520314324054,0 rte,acc,0.5776173285198556,0.029731622646495887,0 sciq,acc,0.837,0.011686212712746849,0 sciq,acc_norm,0.757,0.013569640199177458,0 storycloze_2016,acc,0.7204703367183325,0.01037770209970486,0 winogrande,acc,0.5864246250986582,0.013840971763195303,0