Spaces:
Runtime error
Runtime error
Update
Browse files- dvc.yaml +1 -0
- src/data/make_dataset.py +3 -3
- src/data/process_data.py +8 -0
dvc.yaml
CHANGED
@@ -3,6 +3,7 @@ stages:
|
|
3 |
cmd: python src/models/train_model.py
|
4 |
deps:
|
5 |
- data/processed/train.csv
|
|
|
6 |
- src/models/train_model.py
|
7 |
outs:
|
8 |
- models:
|
|
|
3 |
cmd: python src/models/train_model.py
|
4 |
deps:
|
5 |
- data/processed/train.csv
|
6 |
+
- data/processed/validation.csv
|
7 |
- src/models/train_model.py
|
8 |
outs:
|
9 |
- models:
|
src/data/make_dataset.py
CHANGED
@@ -6,9 +6,9 @@ def make_dataset(dataset='cnn_dailymail', split='train'):
|
|
6 |
"""make dataset for summarisation"""
|
7 |
dataset = load_dataset(dataset, '3.0.0', split=split)
|
8 |
df = pd.DataFrame()
|
9 |
-
df['
|
10 |
-
df['
|
11 |
-
df.to_csv('
|
12 |
|
13 |
|
14 |
if __name__ == '__main__':
|
|
|
6 |
"""make dataset for summarisation"""
|
7 |
dataset = load_dataset(dataset, '3.0.0', split=split)
|
8 |
df = pd.DataFrame()
|
9 |
+
df['article'] = dataset['article']
|
10 |
+
df['highlights'] = dataset['highlights']
|
11 |
+
df.to_csv('summarization/data/raw/{}.csv'.format(split))
|
12 |
|
13 |
|
14 |
if __name__ == '__main__':
|
src/data/process_data.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def process_data(split='train'):
|
4 |
+
df= pd.DataFrame()
|
5 |
+
dataset = pd.load_csv('summarization/data/raw/{}.csv'.format(split))
|
6 |
+
df['article'] = dataset['article']
|
7 |
+
df['highlights'] = dataset['highlights']
|
8 |
+
df.to_csv('summarization/data/processed/{}.csv'.format(split))
|