Spaces:
Runtime error
Runtime error
updates
Browse files- src/data/make_dataset.py +1 -1
- src/data/process_data.py +4 -5
src/data/make_dataset.py
CHANGED
@@ -8,7 +8,7 @@ def make_dataset(dataset='cnn_dailymail', split='train'):
|
|
8 |
df = pd.DataFrame()
|
9 |
df['article'] = dataset['article']
|
10 |
df['highlights'] = dataset['highlights']
|
11 |
-
df.to_csv('summarization/data/raw/{}.csv'.format(split))
|
12 |
|
13 |
|
14 |
if __name__ == '__main__':
|
|
|
8 |
df = pd.DataFrame()
|
9 |
df['article'] = dataset['article']
|
10 |
df['highlights'] = dataset['highlights']
|
11 |
+
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
12 |
|
13 |
|
14 |
if __name__ == '__main__':
|
src/data/process_data.py
CHANGED
@@ -2,11 +2,10 @@ import pandas as pd
|
|
2 |
|
3 |
|
4 |
def process_data(split='train'):
|
5 |
-
df = pd.
|
6 |
-
|
7 |
-
df
|
8 |
-
df
|
9 |
-
df.to_csv('summarization/data/processed/{}.csv'.format(split))
|
10 |
|
11 |
|
12 |
if __name__ == '__name__':
|
|
|
2 |
|
3 |
|
4 |
def process_data(split='train'):
|
5 |
+
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
6 |
+
df.rename(columns={"article": "input_text", "highlights": "output_text"})
|
7 |
+
print(df.shape)
|
8 |
+
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
|
|
9 |
|
10 |
|
11 |
if __name__ == '__name__':
|