Spaces:
Running
Running
MLLM_leaderboard
/
eval-results
/JosephusCheung
/Yee-34B-200K-Chat
/results_2023-12-05T04-15-54.776905.json
{ | |
"config_general": { | |
"lighteval_sha": "0e4607eff593f6f842aeaa0e5fa6760f58b9d1e9", | |
"num_few_shot_default": 0, | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 169672.104131814, | |
"end_time": 204633.837438729, | |
"total_evaluation_time_secondes": "34961.733306915005", | |
"model_name": "JosephusCheung/Yee-34B-200K-Chat", | |
"model_sha": "94bc30449e41628f59dd965cb7d9a8eb53ce9a45", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "64.17 GB" | |
}, | |
"results": { | |
"harness|arc:challenge|25": { | |
"acc": 0.6254266211604096, | |
"acc_stderr": 0.014144193471893446, | |
"acc_norm": 0.6561433447098977, | |
"acc_norm_stderr": 0.013880644570156218 | |
}, | |
"harness|hellaswag|10": { | |
"acc": 0.6506671977693687, | |
"acc_stderr": 0.0047578490234119605, | |
"acc_norm": 0.8432583150766779, | |
"acc_norm_stderr": 0.003628140427399768 | |
}, | |
"harness|hendrycksTest-abstract_algebra|5": { | |
"acc": 0.44, | |
"acc_stderr": 0.04988876515698589, | |
"acc_norm": 0.44, | |
"acc_norm_stderr": 0.04988876515698589 | |
}, | |
"harness|hendrycksTest-anatomy|5": { | |
"acc": 0.7333333333333333, | |
"acc_stderr": 0.038201699145179055, | |
"acc_norm": 0.7333333333333333, | |
"acc_norm_stderr": 0.038201699145179055 | |
}, | |
"harness|hendrycksTest-astronomy|5": { | |
"acc": 0.875, | |
"acc_stderr": 0.026913523521537846, | |
"acc_norm": 0.875, | |
"acc_norm_stderr": 0.026913523521537846 | |
}, | |
"harness|hendrycksTest-business_ethics|5": { | |
"acc": 0.79, | |
"acc_stderr": 0.040936018074033256, | |
"acc_norm": 0.79, | |
"acc_norm_stderr": 0.040936018074033256 | |
}, | |
"harness|hendrycksTest-clinical_knowledge|5": { | |
"acc": 0.8301886792452831, | |
"acc_stderr": 0.023108393799841326, | |
"acc_norm": 0.8301886792452831, | |
"acc_norm_stderr": 0.023108393799841326 | |
}, | |
"harness|hendrycksTest-college_biology|5": { | |
"acc": 0.875, | |
"acc_stderr": 0.02765610492929436, | |
"acc_norm": 0.875, | |
"acc_norm_stderr": 0.02765610492929436 | |
}, | |
"harness|hendrycksTest-college_chemistry|5": { | |
"acc": 0.5, | |
"acc_stderr": 0.050251890762960605, | |
"acc_norm": 0.5, | |
"acc_norm_stderr": 0.050251890762960605 | |
}, | |
"harness|hendrycksTest-college_computer_science|5": { | |
"acc": 0.58, | |
"acc_stderr": 0.049604496374885836, | |
"acc_norm": 0.58, | |
"acc_norm_stderr": 0.049604496374885836 | |
}, | |
"harness|hendrycksTest-college_mathematics|5": { | |
"acc": 0.37, | |
"acc_stderr": 0.04852365870939098, | |
"acc_norm": 0.37, | |
"acc_norm_stderr": 0.04852365870939098 | |
}, | |
"harness|hendrycksTest-college_medicine|5": { | |
"acc": 0.6878612716763006, | |
"acc_stderr": 0.03533133389323657, | |
"acc_norm": 0.6878612716763006, | |
"acc_norm_stderr": 0.03533133389323657 | |
}, | |
"harness|hendrycksTest-college_physics|5": { | |
"acc": 0.4411764705882353, | |
"acc_stderr": 0.04940635630605659, | |
"acc_norm": 0.4411764705882353, | |
"acc_norm_stderr": 0.04940635630605659 | |
}, | |
"harness|hendrycksTest-computer_security|5": { | |
"acc": 0.81, | |
"acc_stderr": 0.039427724440366234, | |
"acc_norm": 0.81, | |
"acc_norm_stderr": 0.039427724440366234 | |
}, | |
"harness|hendrycksTest-conceptual_physics|5": { | |
"acc": 0.7617021276595745, | |
"acc_stderr": 0.027851252973889774, | |
"acc_norm": 0.7617021276595745, | |
"acc_norm_stderr": 0.027851252973889774 | |
}, | |
"harness|hendrycksTest-econometrics|5": { | |
"acc": 0.5526315789473685, | |
"acc_stderr": 0.04677473004491199, | |
"acc_norm": 0.5526315789473685, | |
"acc_norm_stderr": 0.04677473004491199 | |
}, | |
"harness|hendrycksTest-electrical_engineering|5": { | |
"acc": 0.7517241379310344, | |
"acc_stderr": 0.03600105692727771, | |
"acc_norm": 0.7517241379310344, | |
"acc_norm_stderr": 0.03600105692727771 | |
}, | |
"harness|hendrycksTest-elementary_mathematics|5": { | |
"acc": 0.6375661375661376, | |
"acc_stderr": 0.024757473902752045, | |
"acc_norm": 0.6375661375661376, | |
"acc_norm_stderr": 0.024757473902752045 | |
}, | |
"harness|hendrycksTest-formal_logic|5": { | |
"acc": 0.5158730158730159, | |
"acc_stderr": 0.044698818540726076, | |
"acc_norm": 0.5158730158730159, | |
"acc_norm_stderr": 0.044698818540726076 | |
}, | |
"harness|hendrycksTest-global_facts|5": { | |
"acc": 0.56, | |
"acc_stderr": 0.049888765156985884, | |
"acc_norm": 0.56, | |
"acc_norm_stderr": 0.049888765156985884 | |
}, | |
"harness|hendrycksTest-high_school_biology|5": { | |
"acc": 0.8612903225806452, | |
"acc_stderr": 0.019662961321414027, | |
"acc_norm": 0.8612903225806452, | |
"acc_norm_stderr": 0.019662961321414027 | |
}, | |
"harness|hendrycksTest-high_school_chemistry|5": { | |
"acc": 0.6206896551724138, | |
"acc_stderr": 0.034139638059062345, | |
"acc_norm": 0.6206896551724138, | |
"acc_norm_stderr": 0.034139638059062345 | |
}, | |
"harness|hendrycksTest-high_school_computer_science|5": { | |
"acc": 0.79, | |
"acc_stderr": 0.040936018074033256, | |
"acc_norm": 0.79, | |
"acc_norm_stderr": 0.040936018074033256 | |
}, | |
"harness|hendrycksTest-high_school_european_history|5": { | |
"acc": 0.8787878787878788, | |
"acc_stderr": 0.02548549837334323, | |
"acc_norm": 0.8787878787878788, | |
"acc_norm_stderr": 0.02548549837334323 | |
}, | |
"harness|hendrycksTest-high_school_geography|5": { | |
"acc": 0.9040404040404041, | |
"acc_stderr": 0.020984808610047926, | |
"acc_norm": 0.9040404040404041, | |
"acc_norm_stderr": 0.020984808610047926 | |
}, | |
"harness|hendrycksTest-high_school_government_and_politics|5": { | |
"acc": 0.9689119170984456, | |
"acc_stderr": 0.012525310625527046, | |
"acc_norm": 0.9689119170984456, | |
"acc_norm_stderr": 0.012525310625527046 | |
}, | |
"harness|hendrycksTest-high_school_macroeconomics|5": { | |
"acc": 0.7794871794871795, | |
"acc_stderr": 0.0210206726808279, | |
"acc_norm": 0.7794871794871795, | |
"acc_norm_stderr": 0.0210206726808279 | |
}, | |
"harness|hendrycksTest-high_school_mathematics|5": { | |
"acc": 0.37037037037037035, | |
"acc_stderr": 0.02944316932303154, | |
"acc_norm": 0.37037037037037035, | |
"acc_norm_stderr": 0.02944316932303154 | |
}, | |
"harness|hendrycksTest-high_school_microeconomics|5": { | |
"acc": 0.819327731092437, | |
"acc_stderr": 0.02499196496660077, | |
"acc_norm": 0.819327731092437, | |
"acc_norm_stderr": 0.02499196496660077 | |
}, | |
"harness|hendrycksTest-high_school_physics|5": { | |
"acc": 0.48344370860927155, | |
"acc_stderr": 0.0408024418562897, | |
"acc_norm": 0.48344370860927155, | |
"acc_norm_stderr": 0.0408024418562897 | |
}, | |
"harness|hendrycksTest-high_school_psychology|5": { | |
"acc": 0.9137614678899083, | |
"acc_stderr": 0.012035597300116245, | |
"acc_norm": 0.9137614678899083, | |
"acc_norm_stderr": 0.012035597300116245 | |
}, | |
"harness|hendrycksTest-high_school_statistics|5": { | |
"acc": 0.625, | |
"acc_stderr": 0.033016908987210894, | |
"acc_norm": 0.625, | |
"acc_norm_stderr": 0.033016908987210894 | |
}, | |
"harness|hendrycksTest-high_school_us_history|5": { | |
"acc": 0.9117647058823529, | |
"acc_stderr": 0.019907399791316945, | |
"acc_norm": 0.9117647058823529, | |
"acc_norm_stderr": 0.019907399791316945 | |
}, | |
"harness|hendrycksTest-high_school_world_history|5": { | |
"acc": 0.9156118143459916, | |
"acc_stderr": 0.01809424711647332, | |
"acc_norm": 0.9156118143459916, | |
"acc_norm_stderr": 0.01809424711647332 | |
}, | |
"harness|hendrycksTest-human_aging|5": { | |
"acc": 0.8116591928251121, | |
"acc_stderr": 0.026241132996407256, | |
"acc_norm": 0.8116591928251121, | |
"acc_norm_stderr": 0.026241132996407256 | |
}, | |
"harness|hendrycksTest-human_sexuality|5": { | |
"acc": 0.9007633587786259, | |
"acc_stderr": 0.026222235171477374, | |
"acc_norm": 0.9007633587786259, | |
"acc_norm_stderr": 0.026222235171477374 | |
}, | |
"harness|hendrycksTest-international_law|5": { | |
"acc": 0.9008264462809917, | |
"acc_stderr": 0.02728524631275896, | |
"acc_norm": 0.9008264462809917, | |
"acc_norm_stderr": 0.02728524631275896 | |
}, | |
"harness|hendrycksTest-jurisprudence|5": { | |
"acc": 0.8888888888888888, | |
"acc_stderr": 0.03038159675665167, | |
"acc_norm": 0.8888888888888888, | |
"acc_norm_stderr": 0.03038159675665167 | |
}, | |
"harness|hendrycksTest-logical_fallacies|5": { | |
"acc": 0.8650306748466258, | |
"acc_stderr": 0.02684576505455386, | |
"acc_norm": 0.8650306748466258, | |
"acc_norm_stderr": 0.02684576505455386 | |
}, | |
"harness|hendrycksTest-machine_learning|5": { | |
"acc": 0.6160714285714286, | |
"acc_stderr": 0.04616143075028546, | |
"acc_norm": 0.6160714285714286, | |
"acc_norm_stderr": 0.04616143075028546 | |
}, | |
"harness|hendrycksTest-management|5": { | |
"acc": 0.8640776699029126, | |
"acc_stderr": 0.033932957297610096, | |
"acc_norm": 0.8640776699029126, | |
"acc_norm_stderr": 0.033932957297610096 | |
}, | |
"harness|hendrycksTest-marketing|5": { | |
"acc": 0.9145299145299145, | |
"acc_stderr": 0.01831589168562586, | |
"acc_norm": 0.9145299145299145, | |
"acc_norm_stderr": 0.01831589168562586 | |
}, | |
"harness|hendrycksTest-medical_genetics|5": { | |
"acc": 0.89, | |
"acc_stderr": 0.03144660377352203, | |
"acc_norm": 0.89, | |
"acc_norm_stderr": 0.03144660377352203 | |
}, | |
"harness|hendrycksTest-miscellaneous|5": { | |
"acc": 0.8978288633461047, | |
"acc_stderr": 0.010830724713134182, | |
"acc_norm": 0.8978288633461047, | |
"acc_norm_stderr": 0.010830724713134182 | |
}, | |
"harness|hendrycksTest-moral_disputes|5": { | |
"acc": 0.8092485549132948, | |
"acc_stderr": 0.02115267696657528, | |
"acc_norm": 0.8092485549132948, | |
"acc_norm_stderr": 0.02115267696657528 | |
}, | |
"harness|hendrycksTest-moral_scenarios|5": { | |
"acc": 0.7195530726256983, | |
"acc_stderr": 0.015024083883322895, | |
"acc_norm": 0.7195530726256983, | |
"acc_norm_stderr": 0.015024083883322895 | |
}, | |
"harness|hendrycksTest-nutrition|5": { | |
"acc": 0.8300653594771242, | |
"acc_stderr": 0.02150538312123138, | |
"acc_norm": 0.8300653594771242, | |
"acc_norm_stderr": 0.02150538312123138 | |
}, | |
"harness|hendrycksTest-philosophy|5": { | |
"acc": 0.8006430868167203, | |
"acc_stderr": 0.022691033780549656, | |
"acc_norm": 0.8006430868167203, | |
"acc_norm_stderr": 0.022691033780549656 | |
}, | |
"harness|hendrycksTest-prehistory|5": { | |
"acc": 0.8827160493827161, | |
"acc_stderr": 0.017903112615281123, | |
"acc_norm": 0.8827160493827161, | |
"acc_norm_stderr": 0.017903112615281123 | |
}, | |
"harness|hendrycksTest-professional_accounting|5": { | |
"acc": 0.6170212765957447, | |
"acc_stderr": 0.02899908090480618, | |
"acc_norm": 0.6170212765957447, | |
"acc_norm_stderr": 0.02899908090480618 | |
}, | |
"harness|hendrycksTest-professional_law|5": { | |
"acc": 0.5560625814863103, | |
"acc_stderr": 0.012689708167787679, | |
"acc_norm": 0.5560625814863103, | |
"acc_norm_stderr": 0.012689708167787679 | |
}, | |
"harness|hendrycksTest-professional_medicine|5": { | |
"acc": 0.8014705882352942, | |
"acc_stderr": 0.02423101337054109, | |
"acc_norm": 0.8014705882352942, | |
"acc_norm_stderr": 0.02423101337054109 | |
}, | |
"harness|hendrycksTest-professional_psychology|5": { | |
"acc": 0.8218954248366013, | |
"acc_stderr": 0.015478369653108568, | |
"acc_norm": 0.8218954248366013, | |
"acc_norm_stderr": 0.015478369653108568 | |
}, | |
"harness|hendrycksTest-public_relations|5": { | |
"acc": 0.7090909090909091, | |
"acc_stderr": 0.04350271442923243, | |
"acc_norm": 0.7090909090909091, | |
"acc_norm_stderr": 0.04350271442923243 | |
}, | |
"harness|hendrycksTest-security_studies|5": { | |
"acc": 0.8367346938775511, | |
"acc_stderr": 0.023661699177098615, | |
"acc_norm": 0.8367346938775511, | |
"acc_norm_stderr": 0.023661699177098615 | |
}, | |
"harness|hendrycksTest-sociology|5": { | |
"acc": 0.8756218905472637, | |
"acc_stderr": 0.023335401790166327, | |
"acc_norm": 0.8756218905472637, | |
"acc_norm_stderr": 0.023335401790166327 | |
}, | |
"harness|hendrycksTest-us_foreign_policy|5": { | |
"acc": 0.88, | |
"acc_stderr": 0.032659863237109066, | |
"acc_norm": 0.88, | |
"acc_norm_stderr": 0.032659863237109066 | |
}, | |
"harness|hendrycksTest-virology|5": { | |
"acc": 0.5903614457831325, | |
"acc_stderr": 0.038284011150790206, | |
"acc_norm": 0.5903614457831325, | |
"acc_norm_stderr": 0.038284011150790206 | |
}, | |
"harness|hendrycksTest-world_religions|5": { | |
"acc": 0.8654970760233918, | |
"acc_stderr": 0.026168221344662297, | |
"acc_norm": 0.8654970760233918, | |
"acc_norm_stderr": 0.026168221344662297 | |
}, | |
"harness|truthfulqa:mc|0": { | |
"mc1": 0.379436964504284, | |
"mc1_stderr": 0.01698703926614299, | |
"mc2": 0.538842608150276, | |
"mc2_stderr": 0.015448158590971197 | |
}, | |
"harness|winogrande|5": { | |
"acc": 0.797947908445146, | |
"acc_stderr": 0.01128501375404745 | |
}, | |
"harness|gsm8k|5": { | |
"acc": 0.3479909021986353, | |
"acc_stderr": 0.013120581030382132 | |
}, | |
"all": { | |
"acc": 0.7397087702526806, | |
"acc_stderr": 0.028697152379174293, | |
"acc_norm": 0.749145830773331, | |
"acc_norm_stderr": 0.029232668522838182, | |
"mc1": 0.379436964504284, | |
"mc1_stderr": 0.01698703926614299, | |
"mc2": 0.538842608150276, | |
"mc2_stderr": 0.015448158590971197 | |
} | |
}, | |
"versions": { | |
"all": 0, | |
"harness|arc:challenge|25": 0, | |
"harness|gsm8k|5": 0, | |
"harness|hellaswag|10": 0, | |
"harness|hendrycksTest-abstract_algebra|5": 1, | |
"harness|hendrycksTest-anatomy|5": 1, | |
"harness|hendrycksTest-astronomy|5": 1, | |
"harness|hendrycksTest-business_ethics|5": 1, | |
"harness|hendrycksTest-clinical_knowledge|5": 1, | |
"harness|hendrycksTest-college_biology|5": 1, | |
"harness|hendrycksTest-college_chemistry|5": 1, | |
"harness|hendrycksTest-college_computer_science|5": 1, | |
"harness|hendrycksTest-college_mathematics|5": 1, | |
"harness|hendrycksTest-college_medicine|5": 1, | |
"harness|hendrycksTest-college_physics|5": 1, | |
"harness|hendrycksTest-computer_security|5": 1, | |
"harness|hendrycksTest-conceptual_physics|5": 1, | |
"harness|hendrycksTest-econometrics|5": 1, | |
"harness|hendrycksTest-electrical_engineering|5": 1, | |
"harness|hendrycksTest-elementary_mathematics|5": 1, | |
"harness|hendrycksTest-formal_logic|5": 1, | |
"harness|hendrycksTest-global_facts|5": 1, | |
"harness|hendrycksTest-high_school_biology|5": 1, | |
"harness|hendrycksTest-high_school_chemistry|5": 1, | |
"harness|hendrycksTest-high_school_computer_science|5": 1, | |
"harness|hendrycksTest-high_school_european_history|5": 1, | |
"harness|hendrycksTest-high_school_geography|5": 1, | |
"harness|hendrycksTest-high_school_government_and_politics|5": 1, | |
"harness|hendrycksTest-high_school_macroeconomics|5": 1, | |
"harness|hendrycksTest-high_school_mathematics|5": 1, | |
"harness|hendrycksTest-high_school_microeconomics|5": 1, | |
"harness|hendrycksTest-high_school_physics|5": 1, | |
"harness|hendrycksTest-high_school_psychology|5": 1, | |
"harness|hendrycksTest-high_school_statistics|5": 1, | |
"harness|hendrycksTest-high_school_us_history|5": 1, | |
"harness|hendrycksTest-high_school_world_history|5": 1, | |
"harness|hendrycksTest-human_aging|5": 1, | |
"harness|hendrycksTest-human_sexuality|5": 1, | |
"harness|hendrycksTest-international_law|5": 1, | |
"harness|hendrycksTest-jurisprudence|5": 1, | |
"harness|hendrycksTest-logical_fallacies|5": 1, | |
"harness|hendrycksTest-machine_learning|5": 1, | |
"harness|hendrycksTest-management|5": 1, | |
"harness|hendrycksTest-marketing|5": 1, | |
"harness|hendrycksTest-medical_genetics|5": 1, | |
"harness|hendrycksTest-miscellaneous|5": 1, | |
"harness|hendrycksTest-moral_disputes|5": 1, | |
"harness|hendrycksTest-moral_scenarios|5": 1, | |
"harness|hendrycksTest-nutrition|5": 1, | |
"harness|hendrycksTest-philosophy|5": 1, | |
"harness|hendrycksTest-prehistory|5": 1, | |
"harness|hendrycksTest-professional_accounting|5": 1, | |
"harness|hendrycksTest-professional_law|5": 1, | |
"harness|hendrycksTest-professional_medicine|5": 1, | |
"harness|hendrycksTest-professional_psychology|5": 1, | |
"harness|hendrycksTest-public_relations|5": 1, | |
"harness|hendrycksTest-security_studies|5": 1, | |
"harness|hendrycksTest-sociology|5": 1, | |
"harness|hendrycksTest-us_foreign_policy|5": 1, | |
"harness|hendrycksTest-virology|5": 1, | |
"harness|hendrycksTest-world_religions|5": 1, | |
"harness|truthfulqa:mc|0": 1, | |
"harness|winogrande|5": 0 | |
}, | |
"config_tasks": { | |
"harness|arc:challenge": "LM Harness task", | |
"harness|gsm8k": "LM Harness task", | |
"harness|hellaswag": "LM Harness task", | |
"harness|hendrycksTest-abstract_algebra": "LM Harness task", | |
"harness|hendrycksTest-anatomy": "LM Harness task", | |
"harness|hendrycksTest-astronomy": "LM Harness task", | |
"harness|hendrycksTest-business_ethics": "LM Harness task", | |
"harness|hendrycksTest-clinical_knowledge": "LM Harness task", | |
"harness|hendrycksTest-college_biology": "LM Harness task", | |
"harness|hendrycksTest-college_chemistry": "LM Harness task", | |
"harness|hendrycksTest-college_computer_science": "LM Harness task", | |
"harness|hendrycksTest-college_mathematics": "LM Harness task", | |
"harness|hendrycksTest-college_medicine": "LM Harness task", | |
"harness|hendrycksTest-college_physics": "LM Harness task", | |
"harness|hendrycksTest-computer_security": "LM Harness task", | |
"harness|hendrycksTest-conceptual_physics": "LM Harness task", | |
"harness|hendrycksTest-econometrics": "LM Harness task", | |
"harness|hendrycksTest-electrical_engineering": "LM Harness task", | |
"harness|hendrycksTest-elementary_mathematics": "LM Harness task", | |
"harness|hendrycksTest-formal_logic": "LM Harness task", | |
"harness|hendrycksTest-global_facts": "LM Harness task", | |
"harness|hendrycksTest-high_school_biology": "LM Harness task", | |
"harness|hendrycksTest-high_school_chemistry": "LM Harness task", | |
"harness|hendrycksTest-high_school_computer_science": "LM Harness task", | |
"harness|hendrycksTest-high_school_european_history": "LM Harness task", | |
"harness|hendrycksTest-high_school_geography": "LM Harness task", | |
"harness|hendrycksTest-high_school_government_and_politics": "LM Harness task", | |
"harness|hendrycksTest-high_school_macroeconomics": "LM Harness task", | |
"harness|hendrycksTest-high_school_mathematics": "LM Harness task", | |
"harness|hendrycksTest-high_school_microeconomics": "LM Harness task", | |
"harness|hendrycksTest-high_school_physics": "LM Harness task", | |
"harness|hendrycksTest-high_school_psychology": "LM Harness task", | |
"harness|hendrycksTest-high_school_statistics": "LM Harness task", | |
"harness|hendrycksTest-high_school_us_history": "LM Harness task", | |
"harness|hendrycksTest-high_school_world_history": "LM Harness task", | |
"harness|hendrycksTest-human_aging": "LM Harness task", | |
"harness|hendrycksTest-human_sexuality": "LM Harness task", | |
"harness|hendrycksTest-international_law": "LM Harness task", | |
"harness|hendrycksTest-jurisprudence": "LM Harness task", | |
"harness|hendrycksTest-logical_fallacies": "LM Harness task", | |
"harness|hendrycksTest-machine_learning": "LM Harness task", | |
"harness|hendrycksTest-management": "LM Harness task", | |
"harness|hendrycksTest-marketing": "LM Harness task", | |
"harness|hendrycksTest-medical_genetics": "LM Harness task", | |
"harness|hendrycksTest-miscellaneous": "LM Harness task", | |
"harness|hendrycksTest-moral_disputes": "LM Harness task", | |
"harness|hendrycksTest-moral_scenarios": "LM Harness task", | |
"harness|hendrycksTest-nutrition": "LM Harness task", | |
"harness|hendrycksTest-philosophy": "LM Harness task", | |
"harness|hendrycksTest-prehistory": "LM Harness task", | |
"harness|hendrycksTest-professional_accounting": "LM Harness task", | |
"harness|hendrycksTest-professional_law": "LM Harness task", | |
"harness|hendrycksTest-professional_medicine": "LM Harness task", | |
"harness|hendrycksTest-professional_psychology": "LM Harness task", | |
"harness|hendrycksTest-public_relations": "LM Harness task", | |
"harness|hendrycksTest-security_studies": "LM Harness task", | |
"harness|hendrycksTest-sociology": "LM Harness task", | |
"harness|hendrycksTest-us_foreign_policy": "LM Harness task", | |
"harness|hendrycksTest-virology": "LM Harness task", | |
"harness|hendrycksTest-world_religions": "LM Harness task", | |
"harness|truthfulqa:mc": "LM Harness task", | |
"harness|winogrande": "LM Harness task" | |
}, | |
"summary_tasks": { | |
"harness|arc:challenge|25": { | |
"hashes": { | |
"hash_examples": "17b0cae357c0259e", | |
"hash_full_prompts": "045cbb916e5145c6", | |
"hash_input_tokens": "f52f7134dd4e8235", | |
"hash_cont_tokens": "e23c779c4c2dd1ec" | |
}, | |
"truncated": 0, | |
"non_truncated": 1172, | |
"padded": 4682, | |
"non_padded": 5, | |
"effective_few_shots": 25.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hellaswag|10": { | |
"hashes": { | |
"hash_examples": "e1768ecb99d7ecf0", | |
"hash_full_prompts": "0b4c16983130f84f", | |
"hash_input_tokens": "8380af90422a117e", | |
"hash_cont_tokens": "55da5ba61989a8fe" | |
}, | |
"truncated": 0, | |
"non_truncated": 10042, | |
"padded": 40097, | |
"non_padded": 71, | |
"effective_few_shots": 10.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-abstract_algebra|5": { | |
"hashes": { | |
"hash_examples": "280f9f325b40559a", | |
"hash_full_prompts": "2f776a367d23aea2", | |
"hash_input_tokens": "9185dc38dcc328ea", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-anatomy|5": { | |
"hashes": { | |
"hash_examples": "2f83a4f1cab4ba18", | |
"hash_full_prompts": "516f74bef25df620", | |
"hash_input_tokens": "90fdbbaaf0213cec", | |
"hash_cont_tokens": "5cc800feae9fa1ad" | |
}, | |
"truncated": 0, | |
"non_truncated": 135, | |
"padded": 540, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-astronomy|5": { | |
"hashes": { | |
"hash_examples": "7d587b908da4d762", | |
"hash_full_prompts": "faf4e80f65de93ca", | |
"hash_input_tokens": "cbe1c711494076b6", | |
"hash_cont_tokens": "655dbb90034f484a" | |
}, | |
"truncated": 0, | |
"non_truncated": 152, | |
"padded": 608, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-business_ethics|5": { | |
"hashes": { | |
"hash_examples": "33e51740670de686", | |
"hash_full_prompts": "db01c3ef8e1479d4", | |
"hash_input_tokens": "09397035a4a73e5f", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-clinical_knowledge|5": { | |
"hashes": { | |
"hash_examples": "f3366dbe7eefffa4", | |
"hash_full_prompts": "49654f71d94b65c3", | |
"hash_input_tokens": "90c311de52544438", | |
"hash_cont_tokens": "f77b74d946d7fc02" | |
}, | |
"truncated": 0, | |
"non_truncated": 265, | |
"padded": 1060, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-college_biology|5": { | |
"hashes": { | |
"hash_examples": "ca2b6753a0193e7f", | |
"hash_full_prompts": "2b460b75f1fdfefd", | |
"hash_input_tokens": "d8fd4e3af4ae46c3", | |
"hash_cont_tokens": "1ba4b1a158d8bf3f" | |
}, | |
"truncated": 0, | |
"non_truncated": 144, | |
"padded": 576, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-college_chemistry|5": { | |
"hashes": { | |
"hash_examples": "22ff85f1d34f42d1", | |
"hash_full_prompts": "242c9be6da583e95", | |
"hash_input_tokens": "da514a10083e8e97", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-college_computer_science|5": { | |
"hashes": { | |
"hash_examples": "30318289d717a5cf", | |
"hash_full_prompts": "ed2bdb4e87c4b371", | |
"hash_input_tokens": "7ccea65975bb46d4", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-college_mathematics|5": { | |
"hashes": { | |
"hash_examples": "4944d1f0b6b5d911", | |
"hash_full_prompts": "770bc4281c973190", | |
"hash_input_tokens": "8ea8585f6adc2650", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-college_medicine|5": { | |
"hashes": { | |
"hash_examples": "dd69cc33381275af", | |
"hash_full_prompts": "ad2a53e5250ab46e", | |
"hash_input_tokens": "9d07c6e852253252", | |
"hash_cont_tokens": "78a0ebf66d91c5cf" | |
}, | |
"truncated": 0, | |
"non_truncated": 173, | |
"padded": 692, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-college_physics|5": { | |
"hashes": { | |
"hash_examples": "875dd26d22655b0d", | |
"hash_full_prompts": "833a0d7b55aed500", | |
"hash_input_tokens": "0d3d540477f9eddb", | |
"hash_cont_tokens": "5a030c95824fdbe5" | |
}, | |
"truncated": 0, | |
"non_truncated": 102, | |
"padded": 408, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-computer_security|5": { | |
"hashes": { | |
"hash_examples": "006451eedc0ededb", | |
"hash_full_prompts": "94034c97e85d8f46", | |
"hash_input_tokens": "5ebc754afaa1fac8", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-conceptual_physics|5": { | |
"hashes": { | |
"hash_examples": "8874ece872d2ca4c", | |
"hash_full_prompts": "e40d15a34640d6fa", | |
"hash_input_tokens": "7780b9cde8badacb", | |
"hash_cont_tokens": "2326dc60d0bc41b6" | |
}, | |
"truncated": 0, | |
"non_truncated": 235, | |
"padded": 940, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-econometrics|5": { | |
"hashes": { | |
"hash_examples": "64d3623b0bfaa43f", | |
"hash_full_prompts": "612f340fae41338d", | |
"hash_input_tokens": "8acec1576892f7ab", | |
"hash_cont_tokens": "be908364b6f14dd6" | |
}, | |
"truncated": 0, | |
"non_truncated": 114, | |
"padded": 456, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-electrical_engineering|5": { | |
"hashes": { | |
"hash_examples": "e98f51780c674d7e", | |
"hash_full_prompts": "10275b312d812ae6", | |
"hash_input_tokens": "e0321889f63f18d7", | |
"hash_cont_tokens": "179280ef597fe1bf" | |
}, | |
"truncated": 0, | |
"non_truncated": 145, | |
"padded": 564, | |
"non_padded": 16, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-elementary_mathematics|5": { | |
"hashes": { | |
"hash_examples": "fc48208a5ac1c0ce", | |
"hash_full_prompts": "5ec274c6c82aca23", | |
"hash_input_tokens": "60e497887b9e2608", | |
"hash_cont_tokens": "95cdcdaf1abd0bd2" | |
}, | |
"truncated": 0, | |
"non_truncated": 378, | |
"padded": 1512, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-formal_logic|5": { | |
"hashes": { | |
"hash_examples": "5a6525665f63ea72", | |
"hash_full_prompts": "07b92638c4a6b500", | |
"hash_input_tokens": "53adc0607e358206", | |
"hash_cont_tokens": "6a4818f3c307c346" | |
}, | |
"truncated": 0, | |
"non_truncated": 126, | |
"padded": 504, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-global_facts|5": { | |
"hashes": { | |
"hash_examples": "371d70d743b2b89b", | |
"hash_full_prompts": "332fdee50a1921b4", | |
"hash_input_tokens": "34682f752c1a1ac4", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_biology|5": { | |
"hashes": { | |
"hash_examples": "a79e1018b1674052", | |
"hash_full_prompts": "e624e26ede922561", | |
"hash_input_tokens": "bb5cc287970e5c14", | |
"hash_cont_tokens": "36d0d84455f0bdba" | |
}, | |
"truncated": 0, | |
"non_truncated": 310, | |
"padded": 1240, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_chemistry|5": { | |
"hashes": { | |
"hash_examples": "44bfc25c389f0e03", | |
"hash_full_prompts": "0e3e5f5d9246482a", | |
"hash_input_tokens": "b12197fdbc9a45f0", | |
"hash_cont_tokens": "c678f794a9b8ee74" | |
}, | |
"truncated": 0, | |
"non_truncated": 203, | |
"padded": 812, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_computer_science|5": { | |
"hashes": { | |
"hash_examples": "8b8cdb1084f24169", | |
"hash_full_prompts": "c00487e67c1813cc", | |
"hash_input_tokens": "36408b638d9d7a8d", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_european_history|5": { | |
"hashes": { | |
"hash_examples": "11cd32d0ef440171", | |
"hash_full_prompts": "318f4513c537c6bf", | |
"hash_input_tokens": "652bd20e505a2826", | |
"hash_cont_tokens": "e9c94304326d875c" | |
}, | |
"truncated": 0, | |
"non_truncated": 165, | |
"padded": 656, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_geography|5": { | |
"hashes": { | |
"hash_examples": "b60019b9e80b642f", | |
"hash_full_prompts": "ee5789fcc1a81b1e", | |
"hash_input_tokens": "8f4cd01faf05c6f1", | |
"hash_cont_tokens": "f937a1349eb483eb" | |
}, | |
"truncated": 0, | |
"non_truncated": 198, | |
"padded": 792, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_government_and_politics|5": { | |
"hashes": { | |
"hash_examples": "d221ec983d143dc3", | |
"hash_full_prompts": "ac42d888e1ce1155", | |
"hash_input_tokens": "217861435fcb5576", | |
"hash_cont_tokens": "8b27dd3907d25b4e" | |
}, | |
"truncated": 0, | |
"non_truncated": 193, | |
"padded": 772, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_macroeconomics|5": { | |
"hashes": { | |
"hash_examples": "59c2915cacfd3fbb", | |
"hash_full_prompts": "c6bd9d25158abd0e", | |
"hash_input_tokens": "bcedb3cf953f812f", | |
"hash_cont_tokens": "3763cae29e2f938c" | |
}, | |
"truncated": 0, | |
"non_truncated": 390, | |
"padded": 1560, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_mathematics|5": { | |
"hashes": { | |
"hash_examples": "1f8ac897608de342", | |
"hash_full_prompts": "5d88f41fc2d643a8", | |
"hash_input_tokens": "52affce916d66c97", | |
"hash_cont_tokens": "fd7b555352d765a4" | |
}, | |
"truncated": 0, | |
"non_truncated": 270, | |
"padded": 1080, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_microeconomics|5": { | |
"hashes": { | |
"hash_examples": "ead6a0f2f6c83370", | |
"hash_full_prompts": "bfc393381298609e", | |
"hash_input_tokens": "b9d29201856d353d", | |
"hash_cont_tokens": "61f46d4a209b9aa2" | |
}, | |
"truncated": 0, | |
"non_truncated": 238, | |
"padded": 952, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_physics|5": { | |
"hashes": { | |
"hash_examples": "c3f2025990afec64", | |
"hash_full_prompts": "fc78b4997e436734", | |
"hash_input_tokens": "9c27af329cb41097", | |
"hash_cont_tokens": "4e7053e7c19d680d" | |
}, | |
"truncated": 0, | |
"non_truncated": 151, | |
"padded": 604, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_psychology|5": { | |
"hashes": { | |
"hash_examples": "21f8aab618f6d636", | |
"hash_full_prompts": "d5c76aa40b9dbc43", | |
"hash_input_tokens": "192aef17a8956826", | |
"hash_cont_tokens": "84d19ae8790476bb" | |
}, | |
"truncated": 0, | |
"non_truncated": 545, | |
"padded": 2180, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_statistics|5": { | |
"hashes": { | |
"hash_examples": "2386a60a11fc5de3", | |
"hash_full_prompts": "4c5c8be5aafac432", | |
"hash_input_tokens": "a9bc6c02c6f83983", | |
"hash_cont_tokens": "b119c7b668213a4e" | |
}, | |
"truncated": 0, | |
"non_truncated": 216, | |
"padded": 864, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_us_history|5": { | |
"hashes": { | |
"hash_examples": "74961543be40f04f", | |
"hash_full_prompts": "5d5ca4840131ba21", | |
"hash_input_tokens": "14741fa2bd2a4414", | |
"hash_cont_tokens": "a3b126bc622d571f" | |
}, | |
"truncated": 0, | |
"non_truncated": 204, | |
"padded": 816, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-high_school_world_history|5": { | |
"hashes": { | |
"hash_examples": "2ad2f6b7198b2234", | |
"hash_full_prompts": "11845057459afd72", | |
"hash_input_tokens": "67f306eb2bf3d2cb", | |
"hash_cont_tokens": "9abf19ceb76331ff" | |
}, | |
"truncated": 0, | |
"non_truncated": 237, | |
"padded": 948, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-human_aging|5": { | |
"hashes": { | |
"hash_examples": "1a7199dc733e779b", | |
"hash_full_prompts": "756b9096b8eaf892", | |
"hash_input_tokens": "e5cc30c46358588f", | |
"hash_cont_tokens": "0e2e725ae9a898da" | |
}, | |
"truncated": 0, | |
"non_truncated": 223, | |
"padded": 892, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-human_sexuality|5": { | |
"hashes": { | |
"hash_examples": "7acb8fdad97f88a6", | |
"hash_full_prompts": "731a52ff15b8cfdb", | |
"hash_input_tokens": "10a6536adeac8632", | |
"hash_cont_tokens": "a94c1dea6d775249" | |
}, | |
"truncated": 0, | |
"non_truncated": 131, | |
"padded": 524, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-international_law|5": { | |
"hashes": { | |
"hash_examples": "1300bfd0dfc59114", | |
"hash_full_prompts": "db2aefbff5eec996", | |
"hash_input_tokens": "d9015aba41ce0d5c", | |
"hash_cont_tokens": "3832f860859bb86b" | |
}, | |
"truncated": 0, | |
"non_truncated": 121, | |
"padded": 484, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-jurisprudence|5": { | |
"hashes": { | |
"hash_examples": "083b1e4904c48dc2", | |
"hash_full_prompts": "0f89ee3fe03d6a21", | |
"hash_input_tokens": "d5f2109de63c3402", | |
"hash_cont_tokens": "9fac5a0c364fca8a" | |
}, | |
"truncated": 0, | |
"non_truncated": 108, | |
"padded": 432, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-logical_fallacies|5": { | |
"hashes": { | |
"hash_examples": "709128f9926a634c", | |
"hash_full_prompts": "98a04b1f8f841069", | |
"hash_input_tokens": "e0b39eb7c9788cfe", | |
"hash_cont_tokens": "dc53ed31134ddf3a" | |
}, | |
"truncated": 0, | |
"non_truncated": 163, | |
"padded": 644, | |
"non_padded": 8, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-machine_learning|5": { | |
"hashes": { | |
"hash_examples": "88f22a636029ae47", | |
"hash_full_prompts": "2e1c8d4b1e0cc921", | |
"hash_input_tokens": "643a872ad0f99bb0", | |
"hash_cont_tokens": "e272b5456d5552d6" | |
}, | |
"truncated": 0, | |
"non_truncated": 112, | |
"padded": 448, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-management|5": { | |
"hashes": { | |
"hash_examples": "8c8a1e07a2151dca", | |
"hash_full_prompts": "f51611f514b265b0", | |
"hash_input_tokens": "1232c5b0f524b151", | |
"hash_cont_tokens": "7119d4642957b1f0" | |
}, | |
"truncated": 0, | |
"non_truncated": 103, | |
"padded": 412, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-marketing|5": { | |
"hashes": { | |
"hash_examples": "2668953431f91e96", | |
"hash_full_prompts": "77562bef997c7650", | |
"hash_input_tokens": "f1d76d4a1e08e901", | |
"hash_cont_tokens": "099d58c66ece3f11" | |
}, | |
"truncated": 0, | |
"non_truncated": 234, | |
"padded": 936, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-medical_genetics|5": { | |
"hashes": { | |
"hash_examples": "9c2dda34a2ea4fd2", | |
"hash_full_prompts": "202139046daa118f", | |
"hash_input_tokens": "cd181ff20fe83b83", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-miscellaneous|5": { | |
"hashes": { | |
"hash_examples": "41adb694024809c2", | |
"hash_full_prompts": "bffec9fc237bcf93", | |
"hash_input_tokens": "a3d90d10e2efc569", | |
"hash_cont_tokens": "bae342d4e82ba8f7" | |
}, | |
"truncated": 0, | |
"non_truncated": 783, | |
"padded": 3132, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-moral_disputes|5": { | |
"hashes": { | |
"hash_examples": "3171c13ba3c594c4", | |
"hash_full_prompts": "170831fc36f1d59e", | |
"hash_input_tokens": "4b35576715cc147a", | |
"hash_cont_tokens": "578c64cbdbb1e0d4" | |
}, | |
"truncated": 0, | |
"non_truncated": 346, | |
"padded": 1384, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-moral_scenarios|5": { | |
"hashes": { | |
"hash_examples": "9873e077e83e0546", | |
"hash_full_prompts": "08f4ceba3131a068", | |
"hash_input_tokens": "1b93703ae85294ee", | |
"hash_cont_tokens": "79b25f42b3fce0f9" | |
}, | |
"truncated": 0, | |
"non_truncated": 895, | |
"padded": 3580, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-nutrition|5": { | |
"hashes": { | |
"hash_examples": "7db1d8142ec14323", | |
"hash_full_prompts": "4c0e68e3586cb453", | |
"hash_input_tokens": "6741a26253bd4258", | |
"hash_cont_tokens": "9d1f3b976417156c" | |
}, | |
"truncated": 0, | |
"non_truncated": 306, | |
"padded": 1224, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-philosophy|5": { | |
"hashes": { | |
"hash_examples": "9b455b7d72811cc8", | |
"hash_full_prompts": "e467f822d8a0d3ff", | |
"hash_input_tokens": "730a52e273f8fcf5", | |
"hash_cont_tokens": "88dab560e1e06d97" | |
}, | |
"truncated": 0, | |
"non_truncated": 311, | |
"padded": 1244, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-prehistory|5": { | |
"hashes": { | |
"hash_examples": "8be90d0f538f1560", | |
"hash_full_prompts": "152187949bcd0921", | |
"hash_input_tokens": "9e211e939e14b414", | |
"hash_cont_tokens": "04ea847139fe9393" | |
}, | |
"truncated": 0, | |
"non_truncated": 324, | |
"padded": 1296, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-professional_accounting|5": { | |
"hashes": { | |
"hash_examples": "8d377597916cd07e", | |
"hash_full_prompts": "0eb7345d6144ee0d", | |
"hash_input_tokens": "d5761e6be99ed835", | |
"hash_cont_tokens": "0435ff692ad17e68" | |
}, | |
"truncated": 0, | |
"non_truncated": 282, | |
"padded": 1124, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-professional_law|5": { | |
"hashes": { | |
"hash_examples": "cd9dbc52b3c932d6", | |
"hash_full_prompts": "36ac764272bfb182", | |
"hash_input_tokens": "fcbc59834dbaa06c", | |
"hash_cont_tokens": "b852c74e9f8801bd" | |
}, | |
"truncated": 0, | |
"non_truncated": 1534, | |
"padded": 6136, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-professional_medicine|5": { | |
"hashes": { | |
"hash_examples": "b20e4e816c1e383e", | |
"hash_full_prompts": "7b8d69ea2acaf2f7", | |
"hash_input_tokens": "ba5999ee85a41b08", | |
"hash_cont_tokens": "5db0f6460652d063" | |
}, | |
"truncated": 0, | |
"non_truncated": 272, | |
"padded": 1088, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-professional_psychology|5": { | |
"hashes": { | |
"hash_examples": "d45b73b22f9cc039", | |
"hash_full_prompts": "fe8937e9ffc99771", | |
"hash_input_tokens": "35652463c3b2d9c6", | |
"hash_cont_tokens": "c960676ef7f3dbe5" | |
}, | |
"truncated": 0, | |
"non_truncated": 612, | |
"padded": 2448, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-public_relations|5": { | |
"hashes": { | |
"hash_examples": "0d25072e1761652a", | |
"hash_full_prompts": "f9adc39cfa9f42ba", | |
"hash_input_tokens": "af501bc2c58d000f", | |
"hash_cont_tokens": "3320565f412c4b01" | |
}, | |
"truncated": 0, | |
"non_truncated": 110, | |
"padded": 440, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-security_studies|5": { | |
"hashes": { | |
"hash_examples": "62bb8197e63d60d4", | |
"hash_full_prompts": "869c9c3ae196b7c3", | |
"hash_input_tokens": "5df7af45226ffc3a", | |
"hash_cont_tokens": "218ed775ef60aab9" | |
}, | |
"truncated": 0, | |
"non_truncated": 245, | |
"padded": 980, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-sociology|5": { | |
"hashes": { | |
"hash_examples": "e7959df87dea8672", | |
"hash_full_prompts": "1a1fc00e17b3a52a", | |
"hash_input_tokens": "5dc2e3734f4dd402", | |
"hash_cont_tokens": "20babf5cc4cc7f3d" | |
}, | |
"truncated": 0, | |
"non_truncated": 201, | |
"padded": 804, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-us_foreign_policy|5": { | |
"hashes": { | |
"hash_examples": "4a56a01ddca44dca", | |
"hash_full_prompts": "0c7a7081c71c07b6", | |
"hash_input_tokens": "ed972b660c40d1e4", | |
"hash_cont_tokens": "bcc22fd85dcc85e9" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-virology|5": { | |
"hashes": { | |
"hash_examples": "451cc86a8c4f4fe9", | |
"hash_full_prompts": "01e95325d8b738e4", | |
"hash_input_tokens": "ed703c55cc114c98", | |
"hash_cont_tokens": "dc6d57296bea0882" | |
}, | |
"truncated": 0, | |
"non_truncated": 166, | |
"padded": 664, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|hendrycksTest-world_religions|5": { | |
"hashes": { | |
"hash_examples": "3b29cfaf1a81c379", | |
"hash_full_prompts": "e0d79a15083dfdff", | |
"hash_input_tokens": "00cf9f5943b1480b", | |
"hash_cont_tokens": "37f53444db289ed3" | |
}, | |
"truncated": 0, | |
"non_truncated": 171, | |
"padded": 684, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|truthfulqa:mc|0": { | |
"hashes": { | |
"hash_examples": "23176c0531c7b867", | |
"hash_full_prompts": "36a6d90e75d92d4a", | |
"hash_input_tokens": "5e931dfc6ab75011", | |
"hash_cont_tokens": "71a67034827cd30e" | |
}, | |
"truncated": 0, | |
"non_truncated": 817, | |
"padded": 9996, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|winogrande|5": { | |
"hashes": { | |
"hash_examples": "aada0a176fd81218", | |
"hash_full_prompts": "c8655cbd12de8409", | |
"hash_input_tokens": "bd055e8ba456ab4a", | |
"hash_cont_tokens": "c93e9c22fa3077a0" | |
}, | |
"truncated": 0, | |
"non_truncated": 1267, | |
"padded": 2534, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"harness|gsm8k|5": { | |
"hashes": { | |
"hash_examples": "4c0843a5d99bcfdc", | |
"hash_full_prompts": "41d55e83abc0e02d", | |
"hash_input_tokens": "5cae6c4034435931", | |
"hash_cont_tokens": "f9475f22afa2fdc5" | |
}, | |
"truncated": 0, | |
"non_truncated": 1319, | |
"padded": 0, | |
"non_padded": 1319, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "3b7fa57a057f9415", | |
"hash_full_prompts": "63615fc50fc9417c", | |
"hash_input_tokens": "2f7ca631fba4ce39", | |
"hash_cont_tokens": "252cc31b34422063" | |
}, | |
"truncated": 0, | |
"non_truncated": 28659, | |
"padded": 113445, | |
"non_padded": 1427, | |
"num_truncated_few_shots": 0 | |
} | |
} |