Spaces:
Runtime error
Runtime error
import pandas as pd | |
import yaml | |
import os | |
def process_data(split='train'): | |
with open("params.yml") as f: | |
params = yaml.safe_load(f) | |
df = pd.read_csv('data/raw/{}.csv'.format(split)) | |
df.columns = ['Unnamed: 0', 'input_text', 'output_text'] | |
df = df.sample(frac=params['split'], replace=True, random_state=1) | |
df.to_csv('data/processed/{}.csv'.format(split)) | |
if __name__ == '__main__': | |
process_data(split='train') | |
process_data(split='test') | |
process_data(split='validation') | |