Spaces:
Build error
Build error
File size: 2,828 Bytes
783053f d57c931 783053f d57c931 783053f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
from sklearn.model_selection import train_test_split
from glob import glob
import shutil
import os
def split_data_from_dir(path: str, new_path: str, test_size: float = 0.2, valid_size: float = 0.2, force_placement: bool = True):
assert test_size > 0 and test_size < 0.5 and valid_size >= 0 and valid_size < 0.5
assert os.path.exists(path) and os.path.isdir(path)
assert os.path.exists(new_path) and os.path.isdir(new_path)
# let us recuperate the images' path from the directory
dirs = os.listdir(path)
# let us recuperate the image of each directory and split the images before making them in new directories
for dir_ in dirs:
# let us recuperate the path of the directory
dir_path = os.path.join(path, dir_)
# let us verify if it is truly a directory before making the following processes
if os.path.isdir(dir_path):
# let us recuperate the files' paths in it
images = os.listdir(dir_path)
# let us split the data between training and test + valid sets
train_set, test_valid_set = train_test_split(images, test_size = test_size + valid_size)
# let us split the test + valid sets between test and valid sets
test_set, valid_set = train_test_split(test_valid_set, test_size = valid_size)
# let us create the train test and valid directories
if not os.path.exists(os.path.join(os.path.join(new_path, 'train'), dir_)) or\
not os.path.exists(os.path.join(os.path.join(new_path, 'test'), dir_)) or\
not os.path.exists(os.path.join(os.path.join(new_path, 'valid'), dir_)):
[os.makedirs(os.path.join(os.path.join(new_path, set_), dir_)) for set_ in ['train', 'test', 'valid']]
elif not force_placement:
raise OSError(f"One of the training, validation or testing directory for the class {dir_} already exists! Enable the force_placement argument if you want to use already created directories.")
# let us place the sets in their locations
[shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'train'), dir_), image)) for image in train_set]
[shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'test'), dir_), image)) for image in test_set]
[shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'valid'), dir_), image)) for image in valid_set]
print(f"All the file in {path} was copied in {new_path} successfully!")
|