|
""" |
|
'JPG_cropping_960...' ver: 22 Nov 10 |
|
Crop pathology images into patches Using average filtering to screen the useful pieces which are mostly red/purple |
|
|
|
Specially mod ver |
|
maximize the efficient of cropping in different size |
|
""" |
|
import os |
|
|
|
os.add_dll_directory(r"D:\chrome_download\github220901\openslide-win64\bin") |
|
|
|
import openslide |
|
import shutil |
|
import PIL.Image as Image |
|
import numpy as np |
|
import openslide |
|
import torch |
|
from tqdm import tqdm |
|
import cv2 |
|
from torchvision import transforms |
|
from PIL import ImageFile |
|
import pandas as pd |
|
|
|
ImageFile.LOAD_TRUNCATED_IMAGES = True |
|
Image.MAX_IMAGE_PIXELS = None |
|
|
|
STANDARD_MPP = 0.4942 |
|
patch_size = [(3840, 3840), (960, 960), (384, 384), (96, 96)] |
|
|
|
|
|
def save_file(f_image, save_dir, suffix='.jpg'): |
|
""" |
|
重命名并保存图片,生成重命名的表 |
|
""" |
|
filepath, _ = os.path.split(save_dir) |
|
if not os.path.exists(filepath): |
|
os.makedirs(filepath) |
|
|
|
image_data = np.asarray(f_image) |
|
cv2.imwrite(save_dir+suffix, image_data) |
|
|
|
|
|
def make_and_clear_path(file_pack_path): |
|
if not os.path.exists(file_pack_path): |
|
os.makedirs(file_pack_path) |
|
|
|
|
|
def find_all_files(root, suffix=None): |
|
""" |
|
Return a list of file paths ended with specific suffix |
|
""" |
|
res = [] |
|
if type(suffix) is tuple or type(suffix) is list: |
|
for root, _, files in os.walk(root): |
|
for f in files: |
|
if suffix is not None: |
|
status = 0 |
|
for i in suffix: |
|
if not f.endswith(i): |
|
pass |
|
else: |
|
status = 1 |
|
break |
|
if status == 0: |
|
continue |
|
res.append(os.path.join(root, f)) |
|
return res |
|
|
|
elif type(suffix) is str or suffix is None: |
|
for root, _, files in os.walk(root): |
|
for f in files: |
|
if suffix is not None and not f.endswith(suffix): |
|
continue |
|
res.append(os.path.join(root, f)) |
|
return res |
|
|
|
else: |
|
print('type of suffix is not legal :', type(suffix)) |
|
return -1 |
|
|
|
|
|
def convert_to_npy(a_data_path, patch_size=(960, 960)): |
|
patch_size = to_2tuple(patch_size) |
|
|
|
|
|
|
|
|
|
img = Image.open(a_data_path) |
|
w, h = img.size |
|
factor = min(w // patch_size[0], h // patch_size[1]) |
|
numpy_img = img.crop([0, 0, factor * patch_size[0], factor * patch_size[1]]) |
|
numpy_img = np.array(numpy_img) |
|
|
|
return numpy_img |
|
|
|
|
|
class to_patch: |
|
""" |
|
Split an image into patches, each patch with the size of patch_size |
|
""" |
|
|
|
def __init__(self, patch_size=(16, 16)): |
|
patch_size = to_2tuple(patch_size) |
|
self.patch_h = patch_size[0] |
|
self.patch_w = patch_size[1] |
|
|
|
def __call__(self, x): |
|
x = torch.tensor(x) |
|
x = x.permute(2, 0, 1) |
|
c, h, w = x.shape |
|
|
|
|
|
num_patches = (h // self.patch_h) * (w // self.patch_w) |
|
|
|
h_1 = (h // self.patch_h) * self.patch_h |
|
w_1 = (w // self.patch_w) * self.patch_w |
|
x = x[:, ((h - h_1) // 2):((h - h_1) // 2 + h_1), ((w - w_1) // 2):((w - w_1) // 2 + w_1)] |
|
|
|
|
|
|
|
|
|
|
|
patches = x.view( |
|
c, |
|
h // self.patch_h, |
|
self.patch_h, |
|
w // self.patch_w, |
|
self.patch_w).permute(1, 3, 2, 4, 0).reshape(num_patches, -1) |
|
|
|
|
|
|
|
|
|
|
|
patches = patches.view(num_patches, |
|
self.patch_h, |
|
self.patch_w, |
|
c).permute(0, 3, 1, 2) |
|
|
|
return patches |
|
|
|
|
|
def to_2tuple(input): |
|
if type(input) is tuple: |
|
if len(input) == 2: |
|
return input |
|
else: |
|
if len(input) > 2: |
|
output = (input[0], input[1]) |
|
return output |
|
elif len(input) == 1: |
|
output = (input[0], input[0]) |
|
return output |
|
else: |
|
print('cannot handle none tuple') |
|
else: |
|
if type(input) is list: |
|
if len(input) == 2: |
|
output = (input[0], input[1]) |
|
return output |
|
else: |
|
if len(input) > 2: |
|
output = (input[0], input[1]) |
|
return output |
|
elif len(input) == 1: |
|
output = (input[0], input[0]) |
|
return output |
|
else: |
|
print('cannot handle none list') |
|
elif type(input) is int: |
|
output = (input, input) |
|
return output |
|
else: |
|
print('cannot handle ', type(input)) |
|
raise ('cannot handle ', type(input)) |
|
|
|
|
|
def pick_patch(patch): |
|
""" |
|
用于选择合适颜色的图片 |
|
:param patch: |
|
:return: |
|
""" |
|
patch = array2img(patch) |
|
img_single = patch.resize((1, 1), Image.ANTIALIAS) |
|
r, g, b = img_single.getpixel((0, 0)) |
|
if r - g < 30: |
|
return False |
|
else: |
|
return True |
|
|
|
|
|
def array2img(patch): |
|
img = Image.fromarray(patch.astype('uint8')).convert('RGB') |
|
return img |
|
|
|
|
|
def make_name(former_name, patch_size, patch_num): |
|
""" |
|
确保每个名字 都反映原图上的横向x,纵向y,步长为自身patch_size |
|
:param former_name: |
|
:param patch_size: |
|
:return: |
|
""" |
|
former_patch_size = int(former_name.split('-')[-3]) |
|
former_x = int(former_name.split('-')[-2]) |
|
former_y = int(former_name.split('-')[-1]) |
|
img_real_name = former_name[::-1].split('-', 3)[-1][::-1] |
|
|
|
ratio = int(former_patch_size / patch_size) |
|
x = patch_num % ratio if patch_num % ratio != 0 else ratio |
|
x = x - 1 |
|
x = former_x * ratio + x |
|
|
|
y = patch_num // ratio if patch_num % ratio != 0 else patch_num // ratio - 1 |
|
y = former_y * ratio + y |
|
|
|
img_name = img_real_name + '-' + str(patch_size) + '-' + str(x) + '-' + str(y) |
|
print(img_name) |
|
return img_name |
|
|
|
|
|
def SVS_cut_to_patch(img, save_root, |
|
patch_size, |
|
img_name, |
|
class_name, |
|
name_dir_3840, name_dir_0, name_dir_1, name_dir_2, |
|
patient_folder=False, |
|
L=True, M=True, S=False): |
|
slide = openslide.open_slide(img) |
|
try: |
|
MPP = slide.properties[openslide.PROPERTY_NAME_MPP_X] |
|
print(MPP, img) |
|
resize_ratio = STANDARD_MPP/float(MPP) |
|
print(resize_ratio) |
|
if 1.1 > resize_ratio > 0.9: |
|
patch_size_num_0 = patch_size[0][0] |
|
else: |
|
patch_size_num_0 = int(patch_size[0][0] * resize_ratio) |
|
print(patch_size_num_0) |
|
save_root_0 = os.path.join(os.path.join(save_root, str(patch_size[0][0])), class_name + '-' + str(patch_size[0][0])) |
|
make_and_clear_path(save_root_0) |
|
w, h = slide.level_dimensions[0] |
|
for i in range(1, w // patch_size_num_0 - 1): |
|
|
|
for j in range(1, h // patch_size_num_0 - 1): |
|
|
|
patch = slide.read_region((i * patch_size_num_0, j * patch_size_num_0), 0, (patch_size_num_0, patch_size_num_0)) |
|
patch = patch.convert('RGB') |
|
|
|
if not 1.1 > resize_ratio > 0.9: |
|
patch = patch.resize(patch_size[0], Image.ANTIALIAS) |
|
|
|
|
|
img_single = patch.resize((1, 1), Image.ANTIALIAS) |
|
r, g, b = img_single.getpixel((0, 0)) |
|
if r < 220 and g < 220 and b < 220 and r > 100 and b > 30 and r > g + 20: |
|
|
|
save_file(patch, os.path.join(save_root_0, img_name + '-' + str(patch_size[0][0]) + '-' + str(i) + '-' + str(j))) |
|
name_dir_3840[os.path.join(save_root_0, img_name + '-' + str(patch_size[0][0]) + '-' + str(i) + '-' + str(j)) + '-' + str(resize_ratio)] = img |
|
if patient_folder is True: |
|
save_root_patient_0 = os.path.join(save_root_0 + '-patient', img_name) |
|
save_file(patch, os.path.join(save_root_patient_0, img_name + '-' + str(patch_size[0][0]) + '-' + str(i) + '-' + str(j))) |
|
current_img = os.path.join(save_root_0, img_name + '-' + str(patch_size[0][0]) + '-' + str(i) + '-' + str(j)) + '.jpg' |
|
|
|
cut_to_patch(current_img, save_root, |
|
patch_size[1], patch_size[2], patch_size[3], |
|
img_name, class_name, |
|
name_dir_0, name_dir_1, name_dir_2, |
|
patient_folder=patient_folder, |
|
L=L, M=M, S=S) |
|
else: |
|
continue |
|
|
|
|
|
pd.DataFrame.from_dict(name_dir_3840, orient='index', columns=['origin path']).to_csv( |
|
os.path.join(os.path.join(save_root, str(patch_size[0][0])), class_name + '-' + str(patch_size[0][0]) + '.csv') |
|
) |
|
|
|
except Exception as e: |
|
print(e) |
|
|
|
|
|
def cut_to_patch(img, |
|
save_root, |
|
patch_size_0, patch_size_1, patch_size_2, |
|
img_name, class_name, |
|
name_dir_0, name_dir_1, name_dir_2, |
|
patient_folder=True, |
|
L=True, M=True, S=False |
|
): |
|
current_img_name = os.path.split(img)[1].split('.')[0] |
|
numpy_img = convert_to_npy(img) |
|
patch_size_num_0 = patch_size_0[0] |
|
patch_size_num_1 = patch_size_1[0] |
|
patch_size_num_2 = patch_size_2[0] |
|
save_root_0 = os.path.join(os.path.join(save_root, str(patch_size_num_0)), class_name + '-' + str(patch_size_num_0)) |
|
save_root_1 = os.path.join(os.path.join(save_root, str(patch_size_num_1)), class_name + '-' + str(patch_size_num_1)) |
|
save_root_2 = os.path.join(os.path.join(save_root, str(patch_size_num_2)), class_name + '-' + str(patch_size_num_2)) |
|
|
|
save_root_patient_0 = os.path.join(save_root_0 + '-patient', img_name) |
|
save_root_patient_1 = os.path.join(save_root_1 + '-patient', img_name) |
|
save_root_patient_2 = os.path.join(save_root_2 + '-patient', img_name) |
|
|
|
img_split_0 = to_patch(patch_size_0) |
|
img_patches_0 = img_split_0(numpy_img) |
|
|
|
img_split_1 = to_patch(patch_size_1) |
|
img_patches_1 = img_split_1(numpy_img) |
|
i = 0 |
|
j = 0 |
|
if L: |
|
|
|
for patch in img_patches_0: |
|
i = i + 1 |
|
patch = patch.permute(1, 2, 0) |
|
patch = patch.numpy() |
|
if pick_patch(patch): |
|
img_name_0 = make_name(current_img_name, patch_size_num_0, i) |
|
save_dir_0 = os.path.join(save_root_0, img_name_0) |
|
print(save_dir_0) |
|
patch = array2img(patch) |
|
|
|
|
|
if patient_folder: |
|
save_file(patch, os.path.join(save_root_patient_0, img_name_0)) |
|
name_dir_0[save_dir_0] = img |
|
|
|
save_file(patch, save_dir_0) |
|
else: |
|
pass |
|
if M: |
|
|
|
|
|
for patch_1 in img_patches_1: |
|
|
|
j = j + 1 |
|
patch_1 = patch_1.permute(1, 2, 0) |
|
patch_1 = patch_1.numpy() |
|
if pick_patch(patch_1): |
|
|
|
img_name_1 = make_name(current_img_name, patch_size_num_1, j) |
|
save_dir_1 = os.path.join(save_root_1, img_name_1) |
|
print(save_dir_1) |
|
if S: |
|
|
|
k = 0 |
|
img_split_2 = to_patch(patch_size_2) |
|
img_patches_2 = img_split_2(patch_1) |
|
for patch_2 in img_patches_2: |
|
k = k + 1 |
|
patch_2 = patch_2.permute(1, 2, 0) |
|
patch_2 = patch_2.numpy() |
|
if pick_patch(patch_2): |
|
|
|
|
|
img_name_2 = make_name(img_name_1, patch_size_num_2, k) |
|
patch_2 = array2img(patch_2) |
|
save_dir_2 = os.path.join(save_root_2, img_name_2) |
|
print(save_dir_2) |
|
if patient_folder: |
|
save_file(patch_2, os.path.join(save_root_patient_2, img_name_2)) |
|
name_dir_2[save_dir_2] = img |
|
save_file(patch_2, save_dir_2) |
|
else: |
|
pass |
|
|
|
patch_1 = array2img(patch_1) |
|
if patient_folder: |
|
save_file(patch_1, os.path.join(save_root_patient_1, img_name_1)) |
|
name_dir_1[save_dir_1] = img |
|
save_file(patch_1, save_dir_1) |
|
else: |
|
pass |
|
pd.DataFrame.from_dict(name_dir_0, orient='index', columns=['origin path']).to_csv( |
|
os.path.join(os.path.join(save_root, |
|
str(patch_size_num_0)), class_name + '-' + str(patch_size_num_0) + '.csv') |
|
) |
|
pd.DataFrame.from_dict(name_dir_1, orient='index', columns=['origin path']).to_csv( |
|
os.path.join(os.path.join(save_root, |
|
str(patch_size_num_1)), class_name + '-' + str(patch_size_num_1) + '.csv') |
|
) |
|
pd.DataFrame.from_dict(name_dir_2, orient='index', columns=['origin path']).to_csv( |
|
os.path.join(os.path.join(save_root, |
|
str(patch_size_num_2)), class_name + '-' + str(patch_size_num_2) + '.csv') |
|
) |
|
|
|
|
|
def read_and_convert(data_root, save_root, suffix=None, patient_folder=False, L=True, M=True, S=False): |
|
|
|
|
|
|
|
|
|
class_names = os.listdir(data_root) |
|
|
|
class_names = ['PAIP2019'] |
|
|
|
|
|
|
|
for class_name in class_names: |
|
|
|
svs_class_root = os.path.join(data_root, class_name) |
|
svs_all_files = find_all_files(svs_class_root, suffix) |
|
|
|
|
|
|
|
name_dir_3840 = {} |
|
name_dir_0 = {} |
|
name_dir_1 = {} |
|
name_dir_2 = {} |
|
for img in svs_all_files: |
|
img_name = os.path.split(img)[1].split('.')[0] |
|
SVS_cut_to_patch(img, save_root, patch_size, img_name, class_name, name_dir_3840, name_dir_0, name_dir_1, name_dir_2, |
|
patient_folder, L=L, M=M, S=S) |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
read_and_convert(r'I:\Puzzle_Tuning_Datasets\Raw', |
|
r'X:\CPIA_WSI_no_sampling_no_rezising', |
|
'svs', |
|
patient_folder=False, |
|
L=True, M=True, S=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|