Spaces:
Runtime error
Runtime error
split added
Browse files- src/data/make_dataset.py +5 -1
src/data/make_dataset.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1 |
import yaml
|
2 |
from datasets import load_dataset
|
3 |
import pandas as pd
|
|
|
|
|
4 |
|
5 |
|
6 |
def make_dataset(dataset='cnn_dailymail', split='train'):
|
7 |
"""make dataset for summarisation"""
|
|
|
|
|
8 |
dataset = load_dataset(dataset, '3.0.0', split=split)
|
9 |
df = pd.DataFrame()
|
10 |
df['article'] = dataset['article']
|
11 |
df['highlights'] = dataset['highlights']
|
12 |
-
|
13 |
|
14 |
|
15 |
if __name__ == '__main__':
|
|
|
1 |
import yaml
|
2 |
from datasets import load_dataset
|
3 |
import pandas as pd
|
4 |
+
import os
|
5 |
+
|
6 |
|
7 |
|
8 |
def make_dataset(dataset='cnn_dailymail', split='train'):
|
9 |
"""make dataset for summarisation"""
|
10 |
+
if not os.path.exists('data/raw'):
|
11 |
+
os.makedirs('data/raw')
|
12 |
dataset = load_dataset(dataset, '3.0.0', split=split)
|
13 |
df = pd.DataFrame()
|
14 |
df['article'] = dataset['article']
|
15 |
df['highlights'] = dataset['highlights']
|
16 |
+
df.to_csv('data/raw/{}.csv'.format(split))
|
17 |
|
18 |
|
19 |
if __name__ == '__main__':
|