summarization / src /data /process_data.py
gagan3012's picture
updates
f9cfbca
raw
history blame
432 Bytes
import pandas as pd
def process_data(split='train'):
df = pd.DataFrame()
dataset = pd.load_csv('summarization/data/raw/{}.csv'.format(split))
df['article'] = dataset['article']
df['highlights'] = dataset['highlights']
df.to_csv('summarization/data/processed/{}.csv'.format(split))
if __name__ == '__name__':
process_data(split='train')
process_data(split='test')
process_data(split='validation')