Muennighoff's picture
Add eval
b851397
{
"results": {
"anli_r1": {
"acc": 0.314,
"acc_stderr": 0.014683991951087962
},
"anli_r2": {
"acc": 0.342,
"acc_stderr": 0.015008706182121734
},
"anli_r3": {
"acc": 0.32416666666666666,
"acc_stderr": 0.013517438120881636
},
"cb": {
"acc": 0.4107142857142857,
"acc_stderr": 0.0663363415035954,
"f1": 0.37437732746529967
},
"copa": {
"acc": 0.79,
"acc_stderr": 0.040936018074033256
},
"hellaswag": {
"acc": 0.4823740290778729,
"acc_stderr": 0.004986680048438317,
"acc_norm": 0.6320454092810197,
"acc_norm_stderr": 0.004812633280078256
},
"rte": {
"acc": 0.5306859205776173,
"acc_stderr": 0.030039730592197812
},
"winogrande": {
"acc": 0.5887924230465666,
"acc_stderr": 0.013829128358676878
},
"storycloze_2016": {
"acc": 0.7215392838054516,
"acc_stderr": 0.010365521460604417
},
"boolq": {
"acc": 0.599388379204893,
"acc_stderr": 0.008570545612096372
},
"arc_easy": {
"acc": 0.6342592592592593,
"acc_stderr": 0.00988298806941883,
"acc_norm": 0.6212121212121212,
"acc_norm_stderr": 0.00995373765654204
},
"arc_challenge": {
"acc": 0.29180887372013653,
"acc_stderr": 0.013284525292403503,
"acc_norm": 0.3046075085324232,
"acc_norm_stderr": 0.01344952210993249
},
"sciq": {
"acc": 0.917,
"acc_stderr": 0.00872852720607479,
"acc_norm": 0.902,
"acc_norm_stderr": 0.009406619184621236
},
"piqa": {
"acc": 0.7600652883569097,
"acc_stderr": 0.009963625892809544,
"acc_norm": 0.7633297062023939,
"acc_norm_stderr": 0.009916841655042809
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}