Spaces:
Runtime error
Runtime error
File size: 776 Bytes
3f8d76d 9bbcc22 322ebac f6b4508 5d9f40a f6b4508 9bbcc22 c6e4955 322ebac c6e4955 322ebac c6e4955 c015c4c 0ee5810 c6e4955 d5a6d18 3f8d76d 5d9f40a c6e4955 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import yaml
from datasets import load_dataset
import pandas as pd
import os
import pprint
def make_dataset(dataset="cnn_dailymail", split="train"):
"""make dataset for summarisation"""
if not os.path.exists("data/raw"):
os.makedirs("data/raw")
dataset = load_dataset(dataset, "3.0.0", split=split)
df = pd.DataFrame()
df["article"] = dataset["article"]
df["highlights"] = dataset["highlights"]
df.to_csv("data/raw/{}.csv".format(split))
if __name__ == "__main__":
with open("data_params.yml") as f:
params = yaml.safe_load(f)
pprint.pprint(params)
make_dataset(dataset=params["data"], split="train")
make_dataset(dataset=params["data"], split="test")
make_dataset(dataset=params["data"], split="validation")
|