Spaces:
Runtime error
Runtime error
File size: 583 Bytes
9988244 7140c69 9988244 f9cfbca f49c162 c6e4955 f49c162 f9cfbca c6e4955 f49c162 6d7cb20 5322cc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import pandas as pd
import yaml
def process_data(frac=0.1, split="train"):
df = pd.read_csv("data/raw/{}.csv".format(split))
df.columns = ["Unnamed: 0", "input_text", "output_text"]
df_new = df.sample(frac=frac, replace=True, random_state=1)
df_new.to_csv("data/processed/{}.csv".format(split))
if __name__ == "__main__":
with open("data_params.yml") as f:
params = yaml.safe_load(f)
process_data(frac=params["split"], split="train")
process_data(frac=params["split"], split="test")
process_data(frac=params["split"], split="validation")
|