Tianyinus's picture
init submit
edcf5ee verified
"""
Organize the data to ensure that all data is in jpg format ver: Jan 9th 15:30 official release
"""
import os
import re
import csv
import shutil
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torchvision.transforms
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
def del_file(filepath):
"""
Delete all files and folders in one directory
:param filepath: file path
:return:
"""
del_list = os.listdir(filepath)
for f in del_list:
file_path = os.path.join(filepath, f)
if os.path.isfile(file_path):
os.remove(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
def make_and_clear_path(file_pack_path):
if not os.path.exists(file_pack_path):
os.makedirs(file_pack_path)
del_file(file_pack_path)
def find_all_files(root, suffix=None):
"""
Return a list of file paths ended with specific suffix
"""
res = []
for root, _, files in os.walk(root):
for f in files:
if suffix is not None and not f.endswith(suffix):
continue
res.append(os.path.join(root, f))
return res
def read_file(f_dir):
"""
Read a file and convert it into numpy format
"""
f_image = Image.open(f_dir)
return f_image
def change_shape(image, corp_x=2400, corp_y=1800, f_x=1390, f_y=1038):
"""
Resize the image into x*y
"""
if image.size[0] > corp_x or image.size[1] > corp_y:
# Generate an object of CenterCrop class to crop the image from the center into corp_x*corp_y
crop_obj = torchvision.transforms.CenterCrop((corp_y, corp_x))
image = crop_obj(image)
# print(image.size[0], image.size[1])
image.thumbnail((f_x, f_y), Image.ANTIALIAS)
return image
def save_file(f_image, save_dir, suffix='.jpg'):
"""
Save and rename the images, generate the renamed table
"""
filepath, _ = os.path.split(save_dir)
if not os.path.exists(filepath):
os.makedirs(filepath)
f_image.save(save_dir + suffix)
def PC_to_stander(root_from=r'C:\Users\admin\Desktop\dataset\PC',
root_positive=r'C:\Users\admin\Desktop\jpg_dataset\P',
root_negative=r'C:\Users\admin\Desktop\jpg_dataset\N', corp_x=2400, corp_y=1800, f_x=1390, f_y=1038):
root_target, _ = os.path.split(root_positive)
make_and_clear_path(root_target)
f_dir_list = find_all_files(root=root_from, suffix='.jpg')
# print(f_dir_list)
name_dict = {} # Save the new and old names
old_size_type = []
size_type = [] # Record all different image sizes (after reshape)
for seq in tqdm(range(len(f_dir_list))):
f_dir = f_dir_list[seq]
if '非癌' in f_dir or '阴性' in f_dir or '良性' in f_dir:
root_target = root_negative
else:
root_target = root_positive
f_image = read_file(f_dir)
size = (f_image.size[0], f_image.size[1])
if size not in old_size_type:
old_size_type.append(size)
f_image = change_shape(f_image, corp_x=corp_x, corp_y=corp_y, f_x=f_x, f_y=f_y)
size = (f_image.size[0], f_image.size[1])
if size not in size_type:
size_type.append(size)
save_dir = os.path.join(root_target, str(seq + 1)) # Set save directory
name_dict[save_dir] = f_dir
save_file(f_image, save_dir)
print('old size type:', old_size_type)
print('size type: ', size_type)
root_target, _ = os.path.split(root_positive)
pd.DataFrame.from_dict(name_dict, orient='index', columns=['origin path']).to_csv(
os.path.join(root_target, 'name_dict.csv'))
def trans_csv_folder_to_imagefoder(target_path=r'C:\Users\admin\Desktop\MRAS_SEED_dataset',
original_path=r'C:\Users\admin\Desktop\dataset\MARS_SEED_Dataset\train\train_org_image',
csv_path=r'C:\Users\admin\Desktop\dataset\MARS_SEED_Dataset\train\train_label.csv'):
"""
Original data format: a folder with image inside + a csv file with header which has the name and category of every image.
Process original dataset and get data packet in image folder format
:param target_path: the path of target image folder
:param original_path: The folder with images
:param csv_path: A csv file with header and the name and category of each image
"""
idx = -1
with open(csv_path, "rt", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
rows = [row for row in reader]
make_and_clear_path(target_path) # Clear target_path
for row in tqdm(rows):
idx += 1
if idx == 0: # Skip the first header
continue
item_path = os.path.join(original_path, row[0])
if os.path.exists(os.path.join(target_path, row[1])):
shutil.copy(item_path, os.path.join(target_path, row[1]))
else:
os.makedirs(os.path.join(target_path, row[1]))
shutil.copy(item_path, os.path.join(target_path, row[1]))
print('total num:', idx)
if __name__ == '__main__':
PC_to_stander(root_from=r'../Desktop/ROSE_2112',
root_positive=r'../Desktop/jpg_dataset/Positive',
root_negative=r'../Desktop/jpg_dataset/Negative', corp_x=5280, corp_y=3956, f_x=1390,
f_y=1038)