import distutils.util import glob import os import shutil import cv2 import pandas as pd from PIL import Image from pydicom import dcmread from pydicom.fileset import FileSet from tqdm import tqdm from .volReader import VolFile script_dir = os.path.dirname(__file__) class Error(Exception): """Base class for exceptions in this module.""" pass def extract_files(dirtoextract, extracted_path, input_format): """Extracts individual image frames from .vol or DICOM files. This function scans a directory for source files of a specified format and extracts them into a structured output directory as PNG images. It handles both .vol files and standard DICOM files. If the output directory already contains files, it will prompt the user before proceeding to overwrite them. Args: dirtoextract (str): The root directory to search for source files. extracted_path (str): The destination directory where the extracted PNG images will be saved. input_format (str): The format of the input files. Must be either "vol" or "dicom". """ assert input_format in ["vol", "dicom"], 'Error: input_format must be "vol" or "dicom".' proceed = True if (os.path.isdir(extracted_path)) and (len(os.listdir(extracted_path)) != 0): val = input( f"{extracted_path} exists and is not empty. Files may be overwritten. Proceed with extraction? (Y/N)" ) proceed = bool(distutils.util.strtobool(val)) if proceed: print(f"Extracting files from {dirtoextract} into {extracted_path}...") if input_format == "vol": files_to_extract = glob.glob(os.path.join(dirtoextract, "**/*.vol"), recursive=True) for _, line in enumerate(tqdm(files_to_extract)): fpath = line.strip("\n") vol = VolFile(fpath) fpath = fpath.replace("\\", "/") path, scan_str = fpath.strip(".vol").rsplit("/", 1) extractpath = os.path.join(extracted_path, scan_str.replace("_", "/")) os.makedirs(extractpath, exist_ok=True) preffix = os.path.join(extractpath, scan_str + "_oct") vol.render_oct_scans(preffix) elif input_format == "dicom": keywords = ["SOPInstanceUID", "PatientID", "ImageLaterality", "SeriesDate"] list_of_dicts = [] dirgen = glob.iglob(os.path.join(dirtoextract, "**/DICOMDIR"), recursive=True) for dsstr in dirgen: fs = FileSet(dcmread(dsstr)) fsgenopt = gen_opt_fs(fs) for fi in tqdm(fsgenopt): dd = dict() # top level keywords for key in keywords: dd[key] = fi.get(key) volpath = os.path.join(extracted_path, f"{fi.SOPInstanceUID}") shutil.rmtree(volpath, ignore_errors=True) os.mkdir(volpath) n = fi.NumberOfFrames for i in range(n): fname = os.path.join(volpath, f"{fi.SOPInstanceUID}_oct_{i:03d}.png") Image.fromarray(fi.pixel_array[i]).save(fname) list_of_dicts.append(dd.copy()) dfoct = pd.DataFrame(list_of_dicts, columns=keywords) dfoct.to_csv(os.path.join(extracted_path, "basic_meta.csv")) else: pass def rpd_data(extracted_path): """Generates a dataset list from a directory of extracted image files. Scans a directory recursively for PNG images and creates a list of dictionaries, one for each image. This format is designed to be compatible with Detectron2's `DatasetCatalog` and can be adapted to hold ground truth instances for evaluation. Args: extracted_path (str): The root directory containing the extracted .png image files to be included in the dataset. Returns: list[dict]: A list where each dictionary represents an image and contains its file path, dimensions, and a unique ID. """ dataset = [] extracted_files = glob.glob(os.path.join(extracted_path, "**/*.[Pp][Nn][Gg]"), recursive=True) print("Generating dataset of images...") for fn in tqdm(extracted_files): fn_adjusted = fn.replace("\\", "/") imageid = fn_adjusted.split("/")[-1] im = cv2.imread(fn) dat = dict(file_name=fn_adjusted, height=im.shape[0], width=im.shape[1], image_id=imageid) dataset.append(dat) print(f"Found {len(dataset)} images") return dataset def gen_opt_fs(fs): """A generator for finding and loading OPT modality DICOM datasets. This function filters a pydicom `FileSet` object for instances that have the modality set to "OPT" (Ophthalmic Tomography) and yields each one as a fully loaded pydicom dataset. Args: fs (pydicom.fileset.FileSet): The pydicom FileSet to search through. Yields: pydicom.dataset.FileDataset: A loaded DICOM dataset for each instance with the "OPT" modality found in the FileSet. """ for instance in fs.find(Modality="OPT"): ds = instance.load() yield ds