Spaces:

felipekitamura
/

face_deid_ct

Sleeping

File size: 10,530 Bytes

import os
import pydicom
import numpy as np
import cv2
from matplotlib import pyplot as plt
import random
import time
import tqdm
from IPython.core.display import display, HTML

# Determine if we are in a Jupyter notebook
try:
    shell = get_ipython().__class__.__name__
    if shell == 'ZMQInteractiveShell':
        # We are in Jupyter, use tqdm.notebook
        from tqdm.notebook import tqdm
    else:
        raise Exception()
except:
    # We are in a terminal, use standard tqdm
    from tqdm import tqdm

    
FACE_MAX_VALUE = 50
FACE_MIN_VALUE = -125

AIR_THRESHOLD  = -800
KERNEL_SIZE    = 35



def is_dicom(file_path):
    try:
        pydicom.dcmread(file_path)
        return True
    except Exception:
        return False

def get_first_directory(path):
    # Normalize the path to always use Unix-style path separators
    normalized_path = path.replace("\\", "/")
    split_path = normalized_path.split("/")[-1]
    
    return split_path  # Return None if no directories are found

def list_dicom_directories(root_dir):
    dicom_dirs = set()
    
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(root, file)
            if is_dicom(file_path):
                dicom_dirs.add(root)
                break
                
    return list(dicom_dirs)

def load_scan(path):
    slices = [pydicom.read_file(path + '/' + s) for s in os.listdir(path)]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    try:
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    except:
        try:
            slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
        except:
            slice_thickness = 1.0

    for s in slices:
        s.SliceThickness = slice_thickness
        
    return slices

def get_pixels_hu(slices):
    image = np.stack([s.pixel_array for s in slices])
    # Convert to int16 (from sometimes int16), 
    # should be possible as values should always be low enough (<32k)
    image = image.astype(np.int16)

    # Set outside-of-scan pixels to 0
    # The intercept is usually -1024, so air is approximately 0
    image[image == -2000] = 0
    
    # Convert to Hounsfield units (HU)
    for slice_number in range(len(slices)):
        
        intercept = slices[slice_number].RescaleIntercept
        slope = slices[slice_number].RescaleSlope
        
        if slope != 1:
            image[slice_number] = slope * image[slice_number].astype(np.float64)
            image[slice_number] = image[slice_number].astype(np.int16)
            
        image[slice_number] += np.int16(intercept)
    
    return np.array(image, dtype=np.int16)

def binarize_volume(volume, air_hu=AIR_THRESHOLD):
    binary_volume = np.zeros_like(volume, dtype=np.uint8)
    binary_volume[volume <= air_hu] = 1
    return binary_volume

def largest_connected_component(binary_image):
    # Find all connected components and stats
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8)

    # Get the index of the largest component, ignoring the background
    # The background is considered as a component by connectedComponentsWithStats and it is usually the first component
    largest_component_index = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1

    # Create an image to keep largest component only
    largest_component_image = np.zeros(labels.shape, dtype=np.uint8)
    largest_component_image[labels == largest_component_index] = 1

    return largest_component_image

def get_largest_component_volume(volume):
    # Initialize an empty array to hold the processed volume
    processed_volume = np.empty_like(volume, dtype=np.uint8)
    
    # Iterate over each slice in the volume
    for i in range(volume.shape[0]):
        # Process the slice and store it in the processed volume
        processed_volume[i] = largest_connected_component(volume[i])
        
    return processed_volume



def dilate_volume(volume, kernel_size=KERNEL_SIZE):
    # Create the structuring element (kernel) for dilation
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
    
    # Initialize an empty array to hold the dilated volume
    dilated_volume = np.empty_like(volume)
    
    # Iterate over each slice in the volume
    for i in range(volume.shape[0]):
        # Dilate the slice and store it in the dilated volume
        dilated_volume[i] = cv2.dilate(volume[i].astype(np.uint8), kernel)
        
    return dilated_volume


def apply_mask_and_get_values(image_volume, mask_volume):
    # Apply the mask by multiplying the image volume with the mask volume
    masked_volume = image_volume * mask_volume
    
    # Get all unique values in the masked volume, excluding zero
    unique_values = np.unique(masked_volume)
    unique_values = unique_values[unique_values > FACE_MIN_VALUE]
    unique_values = unique_values[unique_values < FACE_MAX_VALUE]

    # Convert numpy array to a list
    unique_values_list = unique_values.tolist()
    
    return unique_values_list


def apply_random_values_optimized(pixels_hu, dilated_volume, unique_values_list):
    # Initialize new volume as a copy of the original volume
    new_volume = np.copy(pixels_hu)

    # Generate random indices
    random_indices = np.random.choice(len(unique_values_list), size=np.sum(dilated_volume))

    # Select random values from the unique_values_list
    random_values = np.array(unique_values_list)[random_indices]

    # Apply the random values to the locations where dilated_volume equals 1
    new_volume[dilated_volume == 1] = random_values

    return new_volume

def save_new_dicom_files(new_volume, original_dir, out_path, app="_d"):
    # Create a new directory path by appending "_d" to the original directory
    if out_path is None:
        new_dir = original_dir + app
    else:
        new_dir = out_path

    # Create the new directory if it doesn't exist
    if not os.path.exists(new_dir):
        os.makedirs(new_dir)

    # List all DICOM files in the original directory
    dicom_files = [os.path.join(original_dir, f) for f in os.listdir(original_dir) if f.endswith('.dcm')]

    # Sort the dicom_files list by SliceLocation
    dicom_files.sort(key=lambda x: pydicom.dcmread(x).SliceLocation)

    # Loop over each slice of the new volume
    for i in range(new_volume.shape[0]):
        # Get the corresponding original DICOM file
        dicom_file = dicom_files[i]

        # Read the file
        ds = pydicom.dcmread(dicom_file)
        ds.decompress()

        # Revert the slope and intercept operation on the slice
        new_slice = (new_volume[i] - ds.RescaleIntercept) / ds.RescaleSlope

        # Update the pixel data with the data from the new slice
        ds.PixelData = new_slice.astype(np.int16).tobytes()

        # Generate new file name 
        new_file_name = os.path.join(new_dir, f"new_image_{i}.dcm")

        # Save the new DICOM file
        ds.save_as(new_file_name)



def drown_volume(in_path, out_path='deid_ct', replacer='face'):
    """
    Processes DICOM files from the provided directory by binarizing, getting the largest connected component, 
    dilating and applying mask. Then applies random values to the dilated volume based on a unique values list 
    obtained from the masked volume (or air value). The results are saved as new DICOM files in a specified directory.
    
    Parameters:
    in_path (str): The path to the directory containing the input DICOM files.
    out_path (str, optional): The path to the directory where the output DICOM files will be saved. 
                              If not provided, the output files will be saved in the input directory appended by "_d".
    replacer (str, optional): Indicates what kind of pixels are going to be replaced. Default is 'face'.
                              'face': replaces air and face with random values that are found in the skin and subcutaneous fat.
                              'air': replaces air and face with -1000 HU.
                              int: replaces air and face with int HU.
    
    Returns:
    None. The function saves new DICOM files and prints the total elapsed time of the operation.
    """
    start_time = time.time()

    dirs = list_dicom_directories(in_path)
    
    for _d in tqdm(dirs, desc="List of studies"):

        with tqdm(total=8, desc="Processing DICOM Files", leave=False) as pbar:
            # Load the DICOM files
            slices = load_scan(_d)
            pbar.update()

            # Get the pixel values and convert them to Hounsfield Units (HU)
            pixels_hu = get_pixels_hu(slices)
            pbar.update()

            # Apply the binarization function on the HU volume
            binarized_volume = binarize_volume(pixels_hu)
            pbar.update()

            # Get the largest connected component from the binarized volume
            processed_volume = get_largest_component_volume(binarized_volume)
            pbar.update()

            # Dilate the processed volume
            dilated_volume = dilate_volume(processed_volume)
            pbar.update()
            if replacer == 'face':
                # Apply the mask to the original volume and get unique values list
                unique_values_list = apply_mask_and_get_values(pixels_hu, dilated_volume - processed_volume)
            elif replacer == 'air':
                unique_values_list = [0]
            else:
                try:
                    replacer = int(replacer)
                    unique_values_list = [replacer]
                except:
                    print('replacer must be either air, face, or an integer number in Hounsfield units, but ' + str(replacer) + ' was provided.')
                    print('replacing with face')
                    unique_values_list = apply_mask_and_get_values(pixels_hu, dilated_volume - processed_volume)

            pbar.update()

            # Apply random values to the dilated volume based on the unique values list
            new_volume = apply_random_values_optimized(pixels_hu, dilated_volume, unique_values_list)
            pbar.update()

            # Save the new DICOM files
            out_path_n = out_path + "/" + get_first_directory(_d)
            save_new_dicom_files(new_volume, _d, out_path_n)
            pbar.update()

        elapsed_time = time.time() - start_time
        print(f"Total elapsed time: {elapsed_time} seconds")