gagan3012 commited on
Commit
99d28ef
·
1 Parent(s): da86775
Files changed (2) hide show
  1. dvc.yaml +1 -0
  2. src/data/process_data.py +1 -1
dvc.yaml CHANGED
@@ -9,6 +9,7 @@ stages:
9
  process_data:
10
  cmd: python src/data/process_data.py
11
  deps:
 
12
  - src/data/process_data.py
13
  outs:
14
  - data/processed/test.csv:
 
9
  process_data:
10
  cmd: python src/data/process_data.py
11
  deps:
12
+ - data/raw
13
  - src/data/process_data.py
14
  outs:
15
  - data/processed/test.csv:
src/data/process_data.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  def process_data(split='train'):
5
  df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
6
  df.rename(columns={"article": "input_text", "highlights": "output_text"})
7
- print(df.shape)
8
  df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
9
 
10
 
 
4
  def process_data(split='train'):
5
  df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
6
  df.rename(columns={"article": "input_text", "highlights": "output_text"})
7
+ print(df.shape())
8
  df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
9
 
10