File size: 1,136 Bytes
8e1a6c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
from huggingface_hub import login
from datasets import load_dataset
from renumics import spotlight

DATASET_LIST = [
    (os.environ["HF_USERNAME"] + "/Easy2Hard-AMC", "v1", "default"),
    (os.environ["HF_USERNAME"] + "/Easy2Hard-Lichess", "v1", "default"),
    (os.environ["HF_USERNAME"] + "/Easy2Hard-ARC", "v1", "test"),
    (os.environ["HF_USERNAME"] + "/Easy2Hard-GSM8K", "v1", "test"),
    (os.environ["HF_USERNAME"] + "/Easy2Hard-HellaSwag", "v1", "validation"),
    (os.environ["HF_USERNAME"] + "/Easy2Hard-Winogrande", "v1", "validation"),
    (os.environ["HF_USERNAME"] + "/Easy2Hard-Leaderboard", "v1", "default"),
]


if __name__ == "__main__":
    # Load dataset and save
    login(token=os.environ["HF_TOKEN"])
    for dataset in DATASET_LIST:
        ds = load_dataset(dataset[0], dataset[1], split=dataset[2])
        ds.to_parquet(
            f"{dataset[0].replace('/', '_')}_{dataset[1]}_{dataset[2]}.parquet"
        )

    view = spotlight.show(
        dataset=ds.to_pandas(),
        folder=".",
        port=7860,
        host="0.0.0.0",
        allow_filebrowsing=True,
        wait="forever",
    )