hf (pretrained=/apdcephfs_qy3/share_301069248/users/rummyyang/LLaMA-Factory/saves/llama3-1b/lora/pretrain/sft/checkpoint-4500), gen_kwargs: (None), limit: None, num_fewshot: None, batch_size: 128 | Tasks |Version| Filter |n-shot| Metric | |Value | |Stderr| |----------------------------------------------------|-------|----------------|-----:|-----------|---|-----:|---|-----:| |arc_challenge | 1|none | 0|acc |↑ |0.2048|± |0.0118| | | |none | 0|acc_norm |↑ |0.2372|± |0.0124| |arc_easy | 1|none | 0|acc |↑ |0.4407|± |0.0102| | | |none | 0|acc_norm |↑ |0.4007|± |0.0101| |ceval-valid |N/A |none | 0|acc |↑ |0.2623|± |0.0120| |ceval-valid_accountant | 1|none | 0|acc |↑ |0.2449|± |0.0621| |ceval-valid_advanced_mathematics | 1|none | 0|acc |↑ |0.2105|± |0.0961| |ceval-valid_art_studies | 1|none | 0|acc |↑ |0.1515|± |0.0634| |ceval-valid_basic_medicine | 1|none | 0|acc |↑ |0.3684|± |0.1137| |ceval-valid_business_administration | 1|none | 0|acc |↑ |0.2727|± |0.0787| |ceval-valid_chinese_language_and_literature | 1|none | 0|acc |↑ |0.1304|± |0.0718| |ceval-valid_civil_servant | 1|none | 0|acc |↑ |0.1702|± |0.0554| |ceval-valid_clinical_medicine | 1|none | 0|acc |↑ |0.2273|± |0.0914| |ceval-valid_college_chemistry | 1|none | 0|acc |↑ |0.2500|± |0.0903| |ceval-valid_college_economics | 1|none | 0|acc |↑ |0.3455|± |0.0647| |ceval-valid_college_physics | 1|none | 0|acc |↑ |0.2632|± |0.1038| |ceval-valid_college_programming | 1|none | 0|acc |↑ |0.2973|± |0.0762| |ceval-valid_computer_architecture | 1|none | 0|acc |↑ |0.2857|± |0.1010| |ceval-valid_computer_network | 1|none | 0|acc |↑ |0.4737|± |0.1177| |ceval-valid_discrete_mathematics | 1|none | 0|acc |↑ |0.2500|± |0.1118| |ceval-valid_education_science | 1|none | 0|acc |↑ |0.3448|± |0.0898| |ceval-valid_electrical_engineer | 1|none | 0|acc |↑ |0.2973|± |0.0762| |ceval-valid_environmental_impact_assessment_engineer| 1|none | 0|acc |↑ |0.2903|± |0.0829| |ceval-valid_fire_engineer | 1|none | 0|acc |↑ |0.3226|± |0.0853| |ceval-valid_high_school_biology | 1|none | 0|acc |↑ |0.2632|± |0.1038| |ceval-valid_high_school_chemistry | 1|none | 0|acc |↑ |0.2632|± |0.1038| |ceval-valid_high_school_chinese | 1|none | 0|acc |↑ |0.1579|± |0.0859| |ceval-valid_high_school_geography | 1|none | 0|acc |↑ |0.2632|± |0.1038| |ceval-valid_high_school_history | 1|none | 0|acc |↑ |0.5500|± |0.1141| |ceval-valid_high_school_mathematics | 1|none | 0|acc |↑ |0.2222|± |0.1008| |ceval-valid_high_school_physics | 1|none | 0|acc |↑ |0.2105|± |0.0961| |ceval-valid_high_school_politics | 1|none | 0|acc |↑ |0.1579|± |0.0859| |ceval-valid_ideological_and_moral_cultivation | 1|none | 0|acc |↑ |0.3684|± |0.1137| |ceval-valid_law | 1|none | 0|acc |↑ |0.1667|± |0.0777| |ceval-valid_legal_professional | 1|none | 0|acc |↑ |0.2174|± |0.0879| |ceval-valid_logic | 1|none | 0|acc |↑ |0.1818|± |0.0842| |ceval-valid_mao_zedong_thought | 1|none | 0|acc |↑ |0.2500|± |0.0903| |ceval-valid_marxism | 1|none | 0|acc |↑ |0.3684|± |0.1137| |ceval-valid_metrology_engineer | 1|none | 0|acc |↑ |0.1667|± |0.0777| |ceval-valid_middle_school_biology | 1|none | 0|acc |↑ |0.0952|± |0.0656| |ceval-valid_middle_school_chemistry | 1|none | 0|acc |↑ |0.3000|± |0.1051| |ceval-valid_middle_school_geography | 1|none | 0|acc |↑ |0.2500|± |0.1306| |ceval-valid_middle_school_history | 1|none | 0|acc |↑ |0.0455|± |0.0455| |ceval-valid_middle_school_mathematics | 1|none | 0|acc |↑ |0.2105|± |0.0961| |ceval-valid_middle_school_physics | 1|none | 0|acc |↑ |0.2632|± |0.1038| |ceval-valid_middle_school_politics | 1|none | 0|acc |↑ |0.2381|± |0.0952| |ceval-valid_modern_chinese_history | 1|none | 0|acc |↑ |0.1739|± |0.0808| |ceval-valid_operating_system | 1|none | 0|acc |↑ |0.3158|± |0.1096| |ceval-valid_physician | 1|none | 0|acc |↑ |0.2653|± |0.0637| |ceval-valid_plant_protection | 1|none | 0|acc |↑ |0.2273|± |0.0914| |ceval-valid_probability_and_statistics | 1|none | 0|acc |↑ |0.3889|± |0.1182| |ceval-valid_professional_tour_guide | 1|none | 0|acc |↑ |0.2759|± |0.0845| |ceval-valid_sports_science | 1|none | 0|acc |↑ |0.2105|± |0.0961| |ceval-valid_tax_accountant | 1|none | 0|acc |↑ |0.3265|± |0.0677| |ceval-valid_teacher_qualification | 1|none | 0|acc |↑ |0.2955|± |0.0696| |ceval-valid_urban_and_rural_planner | 1|none | 0|acc |↑ |0.3043|± |0.0686| |ceval-valid_veterinary_medicine | 1|none | 0|acc |↑ |0.3043|± |0.0981| |cmmlu |N/A |none | 0|acc |↑ |0.2475|± |0.0040| | | |none | 0|acc_norm |↑ |0.2475|± |0.0040| |cmmlu_agronomy | 0|none | 0|acc |↑ |0.2544|± |0.0336| | | |none | 0|acc_norm |↑ |0.2544|± |0.0336| |cmmlu_anatomy | 0|none | 0|acc |↑ |0.2500|± |0.0357| | | |none | 0|acc_norm |↑ |0.2500|± |0.0357| |cmmlu_ancient_chinese | 0|none | 0|acc |↑ |0.2134|± |0.0321| | | |none | 0|acc_norm |↑ |0.2134|± |0.0321| |cmmlu_arts | 0|none | 0|acc |↑ |0.2375|± |0.0337| | | |none | 0|acc_norm |↑ |0.2375|± |0.0337| |cmmlu_astronomy | 0|none | 0|acc |↑ |0.2424|± |0.0335| | | |none | 0|acc_norm |↑ |0.2424|± |0.0335| |cmmlu_business_ethics | 0|none | 0|acc |↑ |0.2344|± |0.0294| | | |none | 0|acc_norm |↑ |0.2344|± |0.0294| |cmmlu_chinese_civil_service_exam | 0|none | 0|acc |↑ |0.2500|± |0.0343| | | |none | 0|acc_norm |↑ |0.2500|± |0.0343| |cmmlu_chinese_driving_rule | 0|none | 0|acc |↑ |0.2366|± |0.0373| | | |none | 0|acc_norm |↑ |0.2366|± |0.0373| |cmmlu_chinese_food_culture | 0|none | 0|acc |↑ |0.2353|± |0.0365| | | |none | 0|acc_norm |↑ |0.2353|± |0.0365| |cmmlu_chinese_foreign_policy | 0|none | 0|acc |↑ |0.2430|± |0.0417| | | |none | 0|acc_norm |↑ |0.2430|± |0.0417| |cmmlu_chinese_history | 0|none | 0|acc |↑ |0.2508|± |0.0242| | | |none | 0|acc_norm |↑ |0.2508|± |0.0242| |cmmlu_chinese_literature | 0|none | 0|acc |↑ |0.2353|± |0.0298| | | |none | 0|acc_norm |↑ |0.2353|± |0.0298| |cmmlu_chinese_teacher_qualification | 0|none | 0|acc |↑ |0.2235|± |0.0312| | | |none | 0|acc_norm |↑ |0.2235|± |0.0312| |cmmlu_clinical_knowledge | 0|none | 0|acc |↑ |0.2278|± |0.0273| | | |none | 0|acc_norm |↑ |0.2278|± |0.0273| |cmmlu_college_actuarial_science | 0|none | 0|acc |↑ |0.2170|± |0.0402| | | |none | 0|acc_norm |↑ |0.2170|± |0.0402| |cmmlu_college_education | 0|none | 0|acc |↑ |0.3271|± |0.0456| | | |none | 0|acc_norm |↑ |0.3271|± |0.0456| |cmmlu_college_engineering_hydrology | 0|none | 0|acc |↑ |0.2642|± |0.0430| | | |none | 0|acc_norm |↑ |0.2642|± |0.0430| |cmmlu_college_law | 0|none | 0|acc |↑ |0.2222|± |0.0402| | | |none | 0|acc_norm |↑ |0.2222|± |0.0402| |cmmlu_college_mathematics | 0|none | 0|acc |↑ |0.2095|± |0.0399| | | |none | 0|acc_norm |↑ |0.2095|± |0.0399| |cmmlu_college_medical_statistics | 0|none | 0|acc |↑ |0.2547|± |0.0425| | | |none | 0|acc_norm |↑ |0.2547|± |0.0425| |cmmlu_college_medicine | 0|none | 0|acc |↑ |0.2784|± |0.0272| | | |none | 0|acc_norm |↑ |0.2784|± |0.0272| |cmmlu_computer_science | 0|none | 0|acc |↑ |0.2157|± |0.0289| | | |none | 0|acc_norm |↑ |0.2157|± |0.0289| |cmmlu_computer_security | 0|none | 0|acc |↑ |0.2632|± |0.0338| | | |none | 0|acc_norm |↑ |0.2632|± |0.0338| |cmmlu_conceptual_physics | 0|none | 0|acc |↑ |0.2653|± |0.0365| | | |none | 0|acc_norm |↑ |0.2653|± |0.0365| |cmmlu_construction_project_management | 0|none | 0|acc |↑ |0.2446|± |0.0366| | | |none | 0|acc_norm |↑ |0.2446|± |0.0366| |cmmlu_economics | 0|none | 0|acc |↑ |0.2579|± |0.0348| | | |none | 0|acc_norm |↑ |0.2579|± |0.0348| |cmmlu_education | 0|none | 0|acc |↑ |0.2270|± |0.0329| | | |none | 0|acc_norm |↑ |0.2270|± |0.0329| |cmmlu_electrical_engineering | 0|none | 0|acc |↑ |0.2500|± |0.0331| | | |none | 0|acc_norm |↑ |0.2500|± |0.0331| |cmmlu_elementary_chinese | 0|none | 0|acc |↑ |0.2341|± |0.0267| | | |none | 0|acc_norm |↑ |0.2341|± |0.0267| |cmmlu_elementary_commonsense | 0|none | 0|acc |↑ |0.2626|± |0.0314| | | |none | 0|acc_norm |↑ |0.2626|± |0.0314| |cmmlu_elementary_information_and_technology | 0|none | 0|acc |↑ |0.2479|± |0.0280| | | |none | 0|acc_norm |↑ |0.2479|± |0.0280| |cmmlu_elementary_mathematics | 0|none | 0|acc |↑ |0.2957|± |0.0302| | | |none | 0|acc_norm |↑ |0.2957|± |0.0302| |cmmlu_ethnology | 0|none | 0|acc |↑ |0.2963|± |0.0394| | | |none | 0|acc_norm |↑ |0.2963|± |0.0394| |cmmlu_food_science | 0|none | 0|acc |↑ |0.2587|± |0.0368| | | |none | 0|acc_norm |↑ |0.2587|± |0.0368| |cmmlu_genetics | 0|none | 0|acc |↑ |0.2386|± |0.0322| | | |none | 0|acc_norm |↑ |0.2386|± |0.0322| |cmmlu_global_facts | 0|none | 0|acc |↑ |0.2752|± |0.0367| | | |none | 0|acc_norm |↑ |0.2752|± |0.0367| |cmmlu_high_school_biology | 0|none | 0|acc |↑ |0.2249|± |0.0322| | | |none | 0|acc_norm |↑ |0.2249|± |0.0322| |cmmlu_high_school_chemistry | 0|none | 0|acc |↑ |0.2652|± |0.0386| | | |none | 0|acc_norm |↑ |0.2652|± |0.0386| |cmmlu_high_school_geography | 0|none | 0|acc |↑ |0.2288|± |0.0388| | | |none | 0|acc_norm |↑ |0.2288|± |0.0388| |cmmlu_high_school_mathematics | 0|none | 0|acc |↑ |0.2561|± |0.0342| | | |none | 0|acc_norm |↑ |0.2561|± |0.0342| |cmmlu_high_school_physics | 0|none | 0|acc |↑ |0.1636|± |0.0354| | | |none | 0|acc_norm |↑ |0.1636|± |0.0354| |cmmlu_high_school_politics | 0|none | 0|acc |↑ |0.2378|± |0.0357| | | |none | 0|acc_norm |↑ |0.2378|± |0.0357| |cmmlu_human_sexuality | 0|none | 0|acc |↑ |0.2222|± |0.0372| | | |none | 0|acc_norm |↑ |0.2222|± |0.0372| |cmmlu_international_law | 0|none | 0|acc |↑ |0.2432|± |0.0316| | | |none | 0|acc_norm |↑ |0.2432|± |0.0316| |cmmlu_journalism | 0|none | 0|acc |↑ |0.2674|± |0.0338| | | |none | 0|acc_norm |↑ |0.2674|± |0.0338| |cmmlu_jurisprudence | 0|none | 0|acc |↑ |0.2482|± |0.0213| | | |none | 0|acc_norm |↑ |0.2482|± |0.0213| |cmmlu_legal_and_moral_basis | 0|none | 0|acc |↑ |0.2617|± |0.0301| | | |none | 0|acc_norm |↑ |0.2617|± |0.0301| |cmmlu_logical | 0|none | 0|acc |↑ |0.2033|± |0.0364| | | |none | 0|acc_norm |↑ |0.2033|± |0.0364| |cmmlu_machine_learning | 0|none | 0|acc |↑ |0.3279|± |0.0427| | | |none | 0|acc_norm |↑ |0.3279|± |0.0427| |cmmlu_management | 0|none | 0|acc |↑ |0.2190|± |0.0286| | | |none | 0|acc_norm |↑ |0.2190|± |0.0286| |cmmlu_marketing | 0|none | 0|acc |↑ |0.2056|± |0.0302| | | |none | 0|acc_norm |↑ |0.2056|± |0.0302| |cmmlu_marxist_theory | 0|none | 0|acc |↑ |0.2540|± |0.0317| | | |none | 0|acc_norm |↑ |0.2540|± |0.0317| |cmmlu_modern_chinese | 0|none | 0|acc |↑ |0.2241|± |0.0389| | | |none | 0|acc_norm |↑ |0.2241|± |0.0389| |cmmlu_nutrition | 0|none | 0|acc |↑ |0.2483|± |0.0360| | | |none | 0|acc_norm |↑ |0.2483|± |0.0360| |cmmlu_philosophy | 0|none | 0|acc |↑ |0.2571|± |0.0429| | | |none | 0|acc_norm |↑ |0.2571|± |0.0429| |cmmlu_professional_accounting | 0|none | 0|acc |↑ |0.2914|± |0.0344| | | |none | 0|acc_norm |↑ |0.2914|± |0.0344| |cmmlu_professional_law | 0|none | 0|acc |↑ |0.2038|± |0.0278| | | |none | 0|acc_norm |↑ |0.2038|± |0.0278| |cmmlu_professional_medicine | 0|none | 0|acc |↑ |0.2527|± |0.0224| | | |none | 0|acc_norm |↑ |0.2527|± |0.0224| |cmmlu_professional_psychology | 0|none | 0|acc |↑ |0.2586|± |0.0288| | | |none | 0|acc_norm |↑ |0.2586|± |0.0288| |cmmlu_public_relations | 0|none | 0|acc |↑ |0.2644|± |0.0335| | | |none | 0|acc_norm |↑ |0.2644|± |0.0335| |cmmlu_security_study | 0|none | 0|acc |↑ |0.2741|± |0.0385| | | |none | 0|acc_norm |↑ |0.2741|± |0.0385| |cmmlu_sociology | 0|none | 0|acc |↑ |0.2743|± |0.0297| | | |none | 0|acc_norm |↑ |0.2743|± |0.0297| |cmmlu_sports_science | 0|none | 0|acc |↑ |0.2545|± |0.0340| | | |none | 0|acc_norm |↑ |0.2545|± |0.0340| |cmmlu_traditional_chinese_medicine | 0|none | 0|acc |↑ |0.2541|± |0.0321| | | |none | 0|acc_norm |↑ |0.2541|± |0.0321| |cmmlu_virology | 0|none | 0|acc |↑ |0.2485|± |0.0333| | | |none | 0|acc_norm |↑ |0.2485|± |0.0333| |cmmlu_world_history | 0|none | 0|acc |↑ |0.2484|± |0.0342| | | |none | 0|acc_norm |↑ |0.2484|± |0.0342| |cmmlu_world_religions | 0|none | 0|acc |↑ |0.2250|± |0.0331| | | |none | 0|acc_norm |↑ |0.2250|± |0.0331| |gsm8k_cot | 3|flexible-extract| 8|exact_match|↑ |0.0152|± |0.0034| | | |strict-match | 8|exact_match|↑ |0.0061|± |0.0021| |hellaswag | 1|none | 0|acc |↑ |0.2996|± |0.0046| | | |none | 0|acc_norm |↑ |0.3276|± |0.0047| |mmlu |N/A |none | 0|acc |↑ |0.2479|± |0.0036| |mmlu_abstract_algebra | 0|none | 0|acc |↑ |0.2600|± |0.0441| |mmlu_anatomy | 0|none | 0|acc |↑ |0.2741|± |0.0385| |mmlu_astronomy | 0|none | 0|acc |↑ |0.2105|± |0.0332| |mmlu_business_ethics | 0|none | 0|acc |↑ |0.2600|± |0.0441| |mmlu_clinical_knowledge | 0|none | 0|acc |↑ |0.2679|± |0.0273| |mmlu_college_biology | 0|none | 0|acc |↑ |0.2292|± |0.0351| |mmlu_college_chemistry | 0|none | 0|acc |↑ |0.2600|± |0.0441| |mmlu_college_computer_science | 0|none | 0|acc |↑ |0.1800|± |0.0386| |mmlu_college_mathematics | 0|none | 0|acc |↑ |0.2800|± |0.0451| |mmlu_college_medicine | 0|none | 0|acc |↑ |0.2197|± |0.0316| |mmlu_college_physics | 0|none | 0|acc |↑ |0.2353|± |0.0422| |mmlu_computer_security | 0|none | 0|acc |↑ |0.2500|± |0.0435| |mmlu_conceptual_physics | 0|none | 0|acc |↑ |0.3404|± |0.0310| |mmlu_econometrics | 0|none | 0|acc |↑ |0.2632|± |0.0414| |mmlu_electrical_engineering | 0|none | 0|acc |↑ |0.1931|± |0.0329| |mmlu_elementary_mathematics | 0|none | 0|acc |↑ |0.2672|± |0.0228| |mmlu_formal_logic | 0|none | 0|acc |↑ |0.2381|± |0.0381| |mmlu_global_facts | 0|none | 0|acc |↑ |0.2000|± |0.0402| |mmlu_high_school_biology | 0|none | 0|acc |↑ |0.2613|± |0.0250| |mmlu_high_school_chemistry | 0|none | 0|acc |↑ |0.2217|± |0.0292| |mmlu_high_school_computer_science | 0|none | 0|acc |↑ |0.2700|± |0.0446| |mmlu_high_school_european_history | 0|none | 0|acc |↑ |0.2667|± |0.0345| |mmlu_high_school_geography | 0|none | 0|acc |↑ |0.2020|± |0.0286| |mmlu_high_school_government_and_politics | 0|none | 0|acc |↑ |0.2332|± |0.0305| |mmlu_high_school_macroeconomics | 0|none | 0|acc |↑ |0.2385|± |0.0216| |mmlu_high_school_mathematics | 0|none | 0|acc |↑ |0.2556|± |0.0266| |mmlu_high_school_microeconomics | 0|none | 0|acc |↑ |0.2353|± |0.0276| |mmlu_high_school_physics | 0|none | 0|acc |↑ |0.2185|± |0.0337| |mmlu_high_school_psychology | 0|none | 0|acc |↑ |0.2257|± |0.0179| |mmlu_high_school_statistics | 0|none | 0|acc |↑ |0.1667|± |0.0254| |mmlu_high_school_us_history | 0|none | 0|acc |↑ |0.2794|± |0.0315| |mmlu_high_school_world_history | 0|none | 0|acc |↑ |0.2405|± |0.0278| |mmlu_human_aging | 0|none | 0|acc |↑ |0.3632|± |0.0323| |mmlu_human_sexuality | 0|none | 0|acc |↑ |0.2443|± |0.0377| |mmlu_humanities |N/A |none | 0|acc |↑ |0.2497|± |0.0063| |mmlu_international_law | 0|none | 0|acc |↑ |0.2479|± |0.0394| |mmlu_jurisprudence | 0|none | 0|acc |↑ |0.3056|± |0.0445| |mmlu_logical_fallacies | 0|none | 0|acc |↑ |0.2454|± |0.0338| |mmlu_machine_learning | 0|none | 0|acc |↑ |0.2768|± |0.0425| |mmlu_management | 0|none | 0|acc |↑ |0.2621|± |0.0435| |mmlu_marketing | 0|none | 0|acc |↑ |0.2436|± |0.0281| |mmlu_medical_genetics | 0|none | 0|acc |↑ |0.3300|± |0.0473| |mmlu_miscellaneous | 0|none | 0|acc |↑ |0.2452|± |0.0154| |mmlu_moral_disputes | 0|none | 0|acc |↑ |0.2572|± |0.0235| |mmlu_moral_scenarios | 0|none | 0|acc |↑ |0.2391|± |0.0143| |mmlu_nutrition | 0|none | 0|acc |↑ |0.2092|± |0.0233| |mmlu_other |N/A |none | 0|acc |↑ |0.2556|± |0.0078| |mmlu_philosophy | 0|none | 0|acc |↑ |0.2637|± |0.0250| |mmlu_prehistory | 0|none | 0|acc |↑ |0.2531|± |0.0242| |mmlu_professional_accounting | 0|none | 0|acc |↑ |0.2766|± |0.0267| |mmlu_professional_law | 0|none | 0|acc |↑ |0.2438|± |0.0110| |mmlu_professional_medicine | 0|none | 0|acc |↑ |0.2022|± |0.0244| |mmlu_professional_psychology | 0|none | 0|acc |↑ |0.2598|± |0.0177| |mmlu_public_relations | 0|none | 0|acc |↑ |0.2818|± |0.0431| |mmlu_security_studies | 0|none | 0|acc |↑ |0.1714|± |0.0241| |mmlu_social_sciences |N/A |none | 0|acc |↑ |0.2379|± |0.0077| |mmlu_sociology | 0|none | 0|acc |↑ |0.2836|± |0.0319| |mmlu_stem |N/A |none | 0|acc |↑ |0.2474|± |0.0077| |mmlu_us_foreign_policy | 0|none | 0|acc |↑ |0.2400|± |0.0429| |mmlu_virology | 0|none | 0|acc |↑ |0.3133|± |0.0361| |mmlu_world_religions | 0|none | 0|acc |↑ |0.2515|± |0.0333| |piqa | 1|none | 0|acc |↑ |0.6279|± |0.0113| | | |none | 0|acc_norm |↑ |0.6289|± |0.0113| |winogrande | 1|none | 0|acc |↑ |0.4862|± |0.0140| | Groups |Version|Filter|n-shot| Metric | |Value | |Stderr| |--------------------|-------|------|-----:|--------|---|-----:|---|-----:| |ceval-valid |N/A |none | 0|acc |↑ |0.2623|± |0.0120| |cmmlu |N/A |none | 0|acc |↑ |0.2475|± |0.0040| | | |none | 0|acc_norm|↑ |0.2475|± |0.0040| |mmlu |N/A |none | 0|acc |↑ |0.2479|± |0.0036| |mmlu_humanities |N/A |none | 0|acc |↑ |0.2497|± |0.0063| |mmlu_other |N/A |none | 0|acc |↑ |0.2556|± |0.0078| |mmlu_social_sciences|N/A |none | 0|acc |↑ |0.2379|± |0.0077| |mmlu_stem |N/A |none | 0|acc |↑ |0.2474|± |0.0077|