Spaces:
Runtime error
Runtime error
split added
Browse files- src/data/process_data.py +8 -0
src/data/process_data.py
CHANGED
@@ -1,10 +1,18 @@
|
|
1 |
import pandas as pd
|
|
|
|
|
2 |
|
3 |
|
4 |
def process_data(split='train'):
|
5 |
|
|
|
|
|
|
|
6 |
df = pd.read_csv('data/raw/{}.csv'.format(split))
|
7 |
df.columns = ['Unnamed: 0', 'input_text', 'output_text']
|
|
|
|
|
|
|
8 |
df.to_csv('data/processed/{}.csv'.format(split))
|
9 |
|
10 |
|
|
|
1 |
import pandas as pd
|
2 |
+
import yaml
|
3 |
+
import os
|
4 |
|
5 |
|
6 |
def process_data(split='train'):
|
7 |
|
8 |
+
with open("params.yml") as f:
|
9 |
+
params = yaml.safe_load(f)
|
10 |
+
|
11 |
df = pd.read_csv('data/raw/{}.csv'.format(split))
|
12 |
df.columns = ['Unnamed: 0', 'input_text', 'output_text']
|
13 |
+
df = df.sample(frac=params['split'], replace=True, random_state=1)
|
14 |
+
if os.path.exists("data/raw/{}.csv"):
|
15 |
+
os.remove("data/raw/{}.csv")
|
16 |
df.to_csv('data/processed/{}.csv'.format(split))
|
17 |
|
18 |
|