gagan3012 commited on
Commit
f6b4508
·
1 Parent(s): 2dbfcc7

split added

Browse files
Files changed (1) hide show
  1. src/data/make_dataset.py +5 -1
src/data/make_dataset.py CHANGED
@@ -1,15 +1,19 @@
1
  import yaml
2
  from datasets import load_dataset
3
  import pandas as pd
 
 
4
 
5
 
6
  def make_dataset(dataset='cnn_dailymail', split='train'):
7
  """make dataset for summarisation"""
 
 
8
  dataset = load_dataset(dataset, '3.0.0', split=split)
9
  df = pd.DataFrame()
10
  df['article'] = dataset['article']
11
  df['highlights'] = dataset['highlights']
12
- #df.to_csv('data/raw/{}.csv'.format(split))
13
 
14
 
15
  if __name__ == '__main__':
 
1
  import yaml
2
  from datasets import load_dataset
3
  import pandas as pd
4
+ import os
5
+
6
 
7
 
8
  def make_dataset(dataset='cnn_dailymail', split='train'):
9
  """make dataset for summarisation"""
10
+ if not os.path.exists('data/raw'):
11
+ os.makedirs('data/raw')
12
  dataset = load_dataset(dataset, '3.0.0', split=split)
13
  df = pd.DataFrame()
14
  df['article'] = dataset['article']
15
  df['highlights'] = dataset['highlights']
16
+ df.to_csv('data/raw/{}.csv'.format(split))
17
 
18
 
19
  if __name__ == '__main__':