project-monai's picture
Upload retinalOCT_RPD_segmentation version 0.0.1
b8597df verified
import distutils.util
import glob
import os
import shutil
import cv2
import pandas as pd
from PIL import Image
from pydicom import dcmread
from pydicom.fileset import FileSet
from tqdm import tqdm
from .volReader import VolFile
script_dir = os.path.dirname(__file__)
class Error(Exception):
"""Base class for exceptions in this module."""
pass
def extract_files(dirtoextract, extracted_path, input_format):
"""Extracts individual image frames from .vol or DICOM files.
This function scans a directory for source files of a specified format
and extracts them into a structured output directory as PNG images.
It handles both .vol files and standard DICOM files. If the
output directory already contains files, it will prompt the user
before proceeding to overwrite them.
Args:
dirtoextract (str): The root directory to search for source files.
extracted_path (str): The destination directory where the extracted
PNG images will be saved.
input_format (str): The format of the input files. Must be either
"vol" or "dicom".
"""
assert input_format in ["vol", "dicom"], 'Error: input_format must be "vol" or "dicom".'
proceed = True
if (os.path.isdir(extracted_path)) and (len(os.listdir(extracted_path)) != 0):
val = input(
f"{extracted_path} exists and is not empty. Files may be overwritten. Proceed with extraction? (Y/N)"
)
proceed = bool(distutils.util.strtobool(val))
if proceed:
print(f"Extracting files from {dirtoextract} into {extracted_path}...")
if input_format == "vol":
files_to_extract = glob.glob(os.path.join(dirtoextract, "**/*.vol"), recursive=True)
for _, line in enumerate(tqdm(files_to_extract)):
fpath = line.strip("\n")
vol = VolFile(fpath)
fpath = fpath.replace("\\", "/")
path, scan_str = fpath.strip(".vol").rsplit("/", 1)
extractpath = os.path.join(extracted_path, scan_str.replace("_", "/"))
os.makedirs(extractpath, exist_ok=True)
preffix = os.path.join(extractpath, scan_str + "_oct")
vol.render_oct_scans(preffix)
elif input_format == "dicom":
keywords = ["SOPInstanceUID", "PatientID", "ImageLaterality", "SeriesDate"]
list_of_dicts = []
dirgen = glob.iglob(os.path.join(dirtoextract, "**/DICOMDIR"), recursive=True)
for dsstr in dirgen:
fs = FileSet(dcmread(dsstr))
fsgenopt = gen_opt_fs(fs)
for fi in tqdm(fsgenopt):
dd = dict()
# top level keywords
for key in keywords:
dd[key] = fi.get(key)
volpath = os.path.join(extracted_path, f"{fi.SOPInstanceUID}")
shutil.rmtree(volpath, ignore_errors=True)
os.mkdir(volpath)
n = fi.NumberOfFrames
for i in range(n):
fname = os.path.join(volpath, f"{fi.SOPInstanceUID}_oct_{i:03d}.png")
Image.fromarray(fi.pixel_array[i]).save(fname)
list_of_dicts.append(dd.copy())
dfoct = pd.DataFrame(list_of_dicts, columns=keywords)
dfoct.to_csv(os.path.join(extracted_path, "basic_meta.csv"))
else:
pass
def rpd_data(extracted_path):
"""Generates a dataset list from a directory of extracted image files.
Scans a directory recursively for PNG images and creates a list of
dictionaries, one for each image. This format is designed to be compatible
with Detectron2's `DatasetCatalog` and can be adapted to hold ground truth instances for evaluation.
Args:
extracted_path (str): The root directory containing the extracted
.png image files to be included in the dataset.
Returns:
list[dict]: A list where each dictionary represents an image and
contains its file path, dimensions, and a unique ID.
"""
dataset = []
extracted_files = glob.glob(os.path.join(extracted_path, "**/*.[Pp][Nn][Gg]"), recursive=True)
print("Generating dataset of images...")
for fn in tqdm(extracted_files):
fn_adjusted = fn.replace("\\", "/")
imageid = fn_adjusted.split("/")[-1]
im = cv2.imread(fn)
dat = dict(file_name=fn_adjusted, height=im.shape[0], width=im.shape[1], image_id=imageid)
dataset.append(dat)
print(f"Found {len(dataset)} images")
return dataset
def gen_opt_fs(fs):
"""A generator for finding and loading OPT modality DICOM datasets.
This function filters a pydicom `FileSet` object for instances that have
the modality set to "OPT" (Ophthalmic Tomography) and yields each one
as a fully loaded pydicom dataset.
Args:
fs (pydicom.fileset.FileSet): The pydicom FileSet to search through.
Yields:
pydicom.dataset.FileDataset: A loaded DICOM dataset for each instance
with the "OPT" modality found in the FileSet.
"""
for instance in fs.find(Modality="OPT"):
ds = instance.load()
yield ds