|
import subprocess |
|
import pandas as pd |
|
import random |
|
|
|
taskname = "spaceship-titanic" |
|
download_dir = "../env" |
|
script_dir = f"." |
|
|
|
input(f"Consent to the competition at https://www.kaggle.com/competitions/{taskname}/data; Press any key") |
|
|
|
subprocess.run(["kaggle", "competitions", "download", "-c", taskname], cwd=download_dir) |
|
subprocess.run(["unzip", "-n", f"{taskname}.zip"], cwd=download_dir) |
|
subprocess.run(["rm", f"{taskname}.zip"], cwd=download_dir) |
|
|
|
trainset = pd.read_csv(f"{download_dir}/train.csv") |
|
trainset = trainset.reset_index(drop=True) |
|
trainset.iloc[:int(len(trainset)*0.8)].to_csv(f"{download_dir}/train.csv", index=False) |
|
testset = trainset.iloc[int(len(trainset)*0.8):] |
|
|
|
testset.drop(list(trainset.keys())[1:-1], axis=1).to_csv(f"{script_dir}/answer.csv", index=False) |
|
testset = testset.drop(['Transported'], axis=1).to_csv(f"{download_dir}/test.csv", index=False) |
|
|