|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
import datasets |
|
import pandas as pd |
|
|
|
|
|
_CITATION = """\ |
|
@article{hendryckstest2021, |
|
title={Measuring Massive Multitask Language Understanding}, |
|
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt}, |
|
journal={Proceedings of the International Conference on Learning Representations (ICLR)}, |
|
year={2021} |
|
} |
|
""" |
|
|
|
_DESCRIPTION = """\ |
|
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021). |
|
""" |
|
|
|
_HOMEPAGE = "https://github.com/hendrycks/test" |
|
|
|
_LICENSE = "MIT" |
|
|
|
_URL = "mmlu.zip" |
|
|
|
task_list = [ |
|
"high_school_european_history", |
|
"business_ethics", |
|
"clinical_knowledge", |
|
"medical_genetics", |
|
"high_school_us_history", |
|
"high_school_physics", |
|
"high_school_world_history", |
|
"virology", |
|
"high_school_microeconomics", |
|
"econometrics", |
|
"college_computer_science", |
|
"high_school_biology", |
|
"abstract_algebra", |
|
"professional_accounting", |
|
"philosophy", |
|
"professional_medicine", |
|
"nutrition", |
|
"global_facts", |
|
"machine_learning", |
|
"security_studies", |
|
"public_relations", |
|
"professional_psychology", |
|
"prehistory", |
|
"anatomy", |
|
"human_sexuality", |
|
"college_medicine", |
|
"high_school_government_and_politics", |
|
"college_chemistry", |
|
"logical_fallacies", |
|
"high_school_geography", |
|
"elementary_mathematics", |
|
"human_aging", |
|
"college_mathematics", |
|
"high_school_psychology", |
|
"formal_logic", |
|
"high_school_statistics", |
|
"international_law", |
|
"high_school_mathematics", |
|
"high_school_computer_science", |
|
"conceptual_physics", |
|
"miscellaneous", |
|
"high_school_chemistry", |
|
"marketing", |
|
"professional_law", |
|
"management", |
|
"college_physics", |
|
"jurisprudence", |
|
"world_religions", |
|
"sociology", |
|
"us_foreign_policy", |
|
"high_school_macroeconomics", |
|
"computer_security", |
|
"moral_scenarios", |
|
"moral_disputes", |
|
"electrical_engineering", |
|
"astronomy", |
|
"college_biology", |
|
] |
|
|
|
|
|
class MMLUConfig(datasets.BuilderConfig): |
|
def __init__(self, **kwargs): |
|
super().__init__(version=datasets.Version("1.0.0"), **kwargs) |
|
|
|
|
|
class MMLU(datasets.GeneratorBasedBuilder): |
|
BUILDER_CONFIGS = [ |
|
MMLUConfig( |
|
name=task_name, |
|
) |
|
for task_name in task_list |
|
] |
|
|
|
def _info(self): |
|
features = datasets.Features( |
|
{ |
|
"question": datasets.Value("string"), |
|
"A": datasets.Value("string"), |
|
"B": datasets.Value("string"), |
|
"C": datasets.Value("string"), |
|
"D": datasets.Value("string"), |
|
"answer": datasets.Value("string"), |
|
} |
|
) |
|
return datasets.DatasetInfo( |
|
description=_DESCRIPTION, |
|
features=features, |
|
homepage=_HOMEPAGE, |
|
license=_LICENSE, |
|
citation=_CITATION, |
|
) |
|
|
|
def _split_generators(self, dl_manager): |
|
data_dir = dl_manager.download_and_extract(_URL) |
|
task_name = self.config.name |
|
return [ |
|
datasets.SplitGenerator( |
|
name=datasets.Split.TEST, |
|
gen_kwargs={ |
|
"filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"), |
|
}, |
|
), |
|
datasets.SplitGenerator( |
|
name=datasets.Split.VALIDATION, |
|
gen_kwargs={ |
|
"filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"), |
|
}, |
|
), |
|
datasets.SplitGenerator( |
|
name=datasets.Split.TRAIN, |
|
gen_kwargs={ |
|
"filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"), |
|
}, |
|
), |
|
] |
|
|
|
def _generate_examples(self, filepath): |
|
df = pd.read_csv(filepath, header=None) |
|
df.columns = ["question", "A", "B", "C", "D", "answer"] |
|
|
|
for i, instance in enumerate(df.to_dict(orient="records")): |
|
yield i, instance |
|
|