|
import distutils.util
|
|
import glob
|
|
import os
|
|
import shutil
|
|
|
|
import cv2
|
|
import pandas as pd
|
|
from PIL import Image
|
|
from pydicom import dcmread
|
|
from pydicom.fileset import FileSet
|
|
from tqdm import tqdm
|
|
|
|
from .volReader import VolFile
|
|
|
|
script_dir = os.path.dirname(__file__)
|
|
|
|
|
|
class Error(Exception):
|
|
"""Base class for exceptions in this module."""
|
|
|
|
pass
|
|
|
|
|
|
def extract_files(dirtoextract, extracted_path, input_format):
|
|
"""Extracts individual image frames from .vol or DICOM files.
|
|
|
|
This function scans a directory for source files of a specified format
|
|
and extracts them into a structured output directory as PNG images.
|
|
It handles both .vol files and standard DICOM files. If the
|
|
output directory already contains files, it will prompt the user
|
|
before proceeding to overwrite them.
|
|
|
|
Args:
|
|
dirtoextract (str): The root directory to search for source files.
|
|
extracted_path (str): The destination directory where the extracted
|
|
PNG images will be saved.
|
|
input_format (str): The format of the input files. Must be either
|
|
"vol" or "dicom".
|
|
"""
|
|
assert input_format in ["vol", "dicom"], 'Error: input_format must be "vol" or "dicom".'
|
|
proceed = True
|
|
if (os.path.isdir(extracted_path)) and (len(os.listdir(extracted_path)) != 0):
|
|
val = input(
|
|
f"{extracted_path} exists and is not empty. Files may be overwritten. Proceed with extraction? (Y/N)"
|
|
)
|
|
proceed = bool(distutils.util.strtobool(val))
|
|
if proceed:
|
|
print(f"Extracting files from {dirtoextract} into {extracted_path}...")
|
|
if input_format == "vol":
|
|
files_to_extract = glob.glob(os.path.join(dirtoextract, "**/*.vol"), recursive=True)
|
|
for _, line in enumerate(tqdm(files_to_extract)):
|
|
fpath = line.strip("\n")
|
|
vol = VolFile(fpath)
|
|
fpath = fpath.replace("\\", "/")
|
|
path, scan_str = fpath.strip(".vol").rsplit("/", 1)
|
|
extractpath = os.path.join(extracted_path, scan_str.replace("_", "/"))
|
|
os.makedirs(extractpath, exist_ok=True)
|
|
preffix = os.path.join(extractpath, scan_str + "_oct")
|
|
vol.render_oct_scans(preffix)
|
|
elif input_format == "dicom":
|
|
keywords = ["SOPInstanceUID", "PatientID", "ImageLaterality", "SeriesDate"]
|
|
list_of_dicts = []
|
|
dirgen = glob.iglob(os.path.join(dirtoextract, "**/DICOMDIR"), recursive=True)
|
|
|
|
for dsstr in dirgen:
|
|
fs = FileSet(dcmread(dsstr))
|
|
fsgenopt = gen_opt_fs(fs)
|
|
for fi in tqdm(fsgenopt):
|
|
dd = dict()
|
|
|
|
for key in keywords:
|
|
dd[key] = fi.get(key)
|
|
|
|
volpath = os.path.join(extracted_path, f"{fi.SOPInstanceUID}")
|
|
shutil.rmtree(volpath, ignore_errors=True)
|
|
os.mkdir(volpath)
|
|
n = fi.NumberOfFrames
|
|
for i in range(n):
|
|
fname = os.path.join(volpath, f"{fi.SOPInstanceUID}_oct_{i:03d}.png")
|
|
Image.fromarray(fi.pixel_array[i]).save(fname)
|
|
list_of_dicts.append(dd.copy())
|
|
dfoct = pd.DataFrame(list_of_dicts, columns=keywords)
|
|
dfoct.to_csv(os.path.join(extracted_path, "basic_meta.csv"))
|
|
else:
|
|
pass
|
|
|
|
|
|
def rpd_data(extracted_path):
|
|
"""Generates a dataset list from a directory of extracted image files.
|
|
|
|
Scans a directory recursively for PNG images and creates a list of
|
|
dictionaries, one for each image. This format is designed to be compatible
|
|
with Detectron2's `DatasetCatalog` and can be adapted to hold ground truth instances for evaluation.
|
|
|
|
Args:
|
|
extracted_path (str): The root directory containing the extracted
|
|
.png image files to be included in the dataset.
|
|
|
|
Returns:
|
|
list[dict]: A list where each dictionary represents an image and
|
|
contains its file path, dimensions, and a unique ID.
|
|
"""
|
|
dataset = []
|
|
extracted_files = glob.glob(os.path.join(extracted_path, "**/*.[Pp][Nn][Gg]"), recursive=True)
|
|
print("Generating dataset of images...")
|
|
for fn in tqdm(extracted_files):
|
|
fn_adjusted = fn.replace("\\", "/")
|
|
imageid = fn_adjusted.split("/")[-1]
|
|
im = cv2.imread(fn)
|
|
dat = dict(file_name=fn_adjusted, height=im.shape[0], width=im.shape[1], image_id=imageid)
|
|
dataset.append(dat)
|
|
print(f"Found {len(dataset)} images")
|
|
return dataset
|
|
|
|
|
|
def gen_opt_fs(fs):
|
|
"""A generator for finding and loading OPT modality DICOM datasets.
|
|
|
|
This function filters a pydicom `FileSet` object for instances that have
|
|
the modality set to "OPT" (Ophthalmic Tomography) and yields each one
|
|
as a fully loaded pydicom dataset.
|
|
|
|
Args:
|
|
fs (pydicom.fileset.FileSet): The pydicom FileSet to search through.
|
|
|
|
Yields:
|
|
pydicom.dataset.FileDataset: A loaded DICOM dataset for each instance
|
|
with the "OPT" modality found in the FileSet.
|
|
"""
|
|
for instance in fs.find(Modality="OPT"):
|
|
ds = instance.load()
|
|
yield ds
|
|
|