Spaces:
Runtime error
Runtime error
updates
Browse files- dvc.yaml +1 -0
- src/data/process_data.py +1 -1
dvc.yaml
CHANGED
@@ -9,6 +9,7 @@ stages:
|
|
9 |
process_data:
|
10 |
cmd: python src/data/process_data.py
|
11 |
deps:
|
|
|
12 |
- src/data/process_data.py
|
13 |
outs:
|
14 |
- data/processed/test.csv:
|
|
|
9 |
process_data:
|
10 |
cmd: python src/data/process_data.py
|
11 |
deps:
|
12 |
+
- data/raw
|
13 |
- src/data/process_data.py
|
14 |
outs:
|
15 |
- data/processed/test.csv:
|
src/data/process_data.py
CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4 |
def process_data(split='train'):
|
5 |
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
6 |
df.rename(columns={"article": "input_text", "highlights": "output_text"})
|
7 |
-
print(df.shape)
|
8 |
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
9 |
|
10 |
|
|
|
4 |
def process_data(split='train'):
|
5 |
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
6 |
df.rename(columns={"article": "input_text", "highlights": "output_text"})
|
7 |
+
print(df.shape())
|
8 |
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
9 |
|
10 |
|