Chris Xiao commited on Sep 16, 2024

Commit

c642393

1 Parent(s): 1b45c44

upload files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

LICENSE +201 -0
README.md +252 -3
assets/method.png +0 -0
metrics/distanceVertex2Mesh.py +64 -0
metrics/get_probability_map.py +194 -0
metrics/lookup_tables.py +463 -0
metrics/metrics.py +355 -0
metrics/surface_distance.py +424 -0
nnunet/__init__.py +7 -0
nnunet/configuration.py +5 -0
nnunet/dataset_conversion/Task017_BeyondCranialVaultAbdominalOrganSegmentation.py +94 -0
nnunet/dataset_conversion/Task024_Promise2012.py +81 -0
nnunet/dataset_conversion/Task027_AutomaticCardiacDetectionChallenge.py +106 -0
nnunet/dataset_conversion/Task029_LiverTumorSegmentationChallenge.py +123 -0
nnunet/dataset_conversion/Task032_BraTS_2018.py +176 -0
nnunet/dataset_conversion/Task035_ISBI_MSLesionSegmentationChallenge.py +162 -0
nnunet/dataset_conversion/Task037_038_Chaos_Challenge.py +460 -0
nnunet/dataset_conversion/Task040_KiTS.py +240 -0
nnunet/dataset_conversion/Task043_BraTS_2019.py +164 -0
nnunet/dataset_conversion/Task055_SegTHOR.py +98 -0
nnunet/dataset_conversion/Task056_VerSe2019.py +274 -0
nnunet/dataset_conversion/Task056_Verse_normalize_orientation.py +98 -0
nnunet/dataset_conversion/Task058_ISBI_EM_SEG.py +105 -0
nnunet/dataset_conversion/Task059_EPFL_EM_MITO_SEG.py +99 -0
nnunet/dataset_conversion/Task061_CREMI.py +146 -0
nnunet/dataset_conversion/Task062_NIHPancreas.py +89 -0
nnunet/dataset_conversion/Task064_KiTS_labelsFixed.py +84 -0
nnunet/dataset_conversion/Task065_KiTS_NicksLabels.py +87 -0
nnunet/dataset_conversion/Task069_CovidSeg.py +68 -0
nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py +137 -0
nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py +312 -0
nnunet/dataset_conversion/Task082_BraTS_2020.py +751 -0
nnunet/dataset_conversion/Task083_VerSe2020.py +138 -0
nnunet/dataset_conversion/Task089_Fluo-N2DH-SIM.py +290 -0
nnunet/dataset_conversion/Task114_heart_MNMs.py +262 -0
nnunet/dataset_conversion/Task115_COVIDSegChallenge.py +344 -0
nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py +103 -0
nnunet/dataset_conversion/Task135_KiTS2021.py +49 -0
nnunet/dataset_conversion/Task154_RibFrac_multi_label.py +172 -0
nnunet/dataset_conversion/Task155_RibFrac_binary.py +174 -0
nnunet/dataset_conversion/Task156_RibSeg.py +140 -0
nnunet/dataset_conversion/Task159_MyoPS2020.py +106 -0
nnunet/dataset_conversion/__init__.py +3 -0
nnunet/dataset_conversion/utils.py +76 -0
nnunet/evaluation/__init__.py +2 -0
nnunet/evaluation/add_dummy_task_with_mean_over_all_tasks.py +77 -0
nnunet/evaluation/add_mean_dice_to_json.py +51 -0
nnunet/evaluation/collect_results_files.py +48 -0
nnunet/evaluation/evaluator.py +483 -0
nnunet/evaluation/metrics.py +406 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,3 +1,252 @@
----
-license: mit
----

+<h2 align="center"> [OTO–HNS2024] A Deep Learning Framework for Analysis of the Eustachian Tube and the Internal Carotid Artery </h2>
+<p align="center">
+<a href="https://aao-hnsfjournals.onlinelibrary.wiley.com/doi/10.1002/ohn.789"><img src="https://img.shields.io/badge/Wiley-Paper-red"></a>
+<a href="https://pubmed.ncbi.nlm.nih.gov/38686594/"><img src="https://img.shields.io/badge/PubMed-Link-blue"></a>
+<a href="https://github.com/mikami520/AutoSeg4ETICA"><img src="https://img.shields.io/badge/Code-Page-magenta"></a>
+</p>
+<h5 align="center"><em>Ameen Amanian, Aseem Jain, Yuliang Xiao, Chanha Kim, Andy S. Ding, Manish Sahu, Russell Taylor, Mathias Unberath, Bryan K. Ward, Deepa Galaiya, Masaru Ishii, Francis X. Creighton</em></h5>
+<p align="center">
+  <a href="#news">News</a> |
+  <a href="#abstract">Abstract</a> |
+  <a href="#installation">Installation</a> |
+  <a href="#train">Train</a> |
+  <a href="#inference">Inference</a> |
+  <a href="#evaluation">Evaluation</a>
+</p>
+## News
+**2024.04.30** - The data preprocessing , training, inference, and evaluation code are released.
+**2024.04.05** - Our paper is accepted to **American Academy of Otolaryngology–Head and Neck Surgery 2024 (OTO-HNS2024)**.
+## Abstract
+- Objective: Obtaining automated, objective 3-dimensional (3D)
+models of the Eustachian tube (ET) and the internal carotid
+artery (ICA) from computed tomography (CT) scans could
+provide useful navigational and diagnostic information for ET
+pathologies and interventions. We aim to develop a deep
+learning (DL) pipeline to automatically segment the ET and
+ICA and use these segmentations to compute distances
+between these structures.
+- Methods: From a database of 30 CT scans, 60 ET and ICA pairs
+were manually segmented and used to train an nnU-Net model,
+a DL segmentation framework. These segmentations were also
+used to develop a quantitative tool to capture the magnitude
+and location of the minimum distance point (MDP) between ET
+and ICA. Performance metrics for the nnU-Net automated
+segmentations were calculated via the average Hausdorff
+distance (AHD) and dice similarity coefficient (DSC).
+- Results: The AHD for the ETand ICA were 0.922 and 0.246 mm,
+respectively. Similarly, the DSC values for the ET and ICA were
+0.578 and 0.884. The mean MDP from ET to ICA in the
+cartilaginous region was 2.6 mm (0.7-5.3 mm) and was located
+on average 1.9 mm caudal from the bony cartilaginous junction.
+- Conclusion: This study describes the first end-to-end DL
+pipeline for automated ET and ICA segmentation and analyzes
+distances between these structures. In addition to helping to
+ensure the safe selection of patients for ET dilation, this
+method can facilitate large-scale studies exploring the
+relationship between ET pathologies and the 3D shape of
+the ET.
+<p align="center">
+  <img src="assets/method.png" />
+  <b>Figure 1: Overview of Workflow
+ </b>
+</p>
+## Installation
+### Step 1: Fork This GitHub Repository
+```bash
+git clone https://github.com/mikami520/AutoSeg4ETICA.git && cd AutoSeg4ETICA
+```
+### Step 2: Set Up Two Environments Using requirements.txt Files (virtual environment is recommended)
+```bash
+pip install -r requirements.txt
+source /path/to/VIRTUAL_ENVIRONMENT/bin/activate
+```
+## Preprocessing
+### Step 1: Register Data to Template
+```bash
+cd <path to repo>/preprocessing
+```
+Register data to template (can be used for multiple segmentations propagation)
+```bash
+python registration.py -bp <full path of base dir> -ip <relative path to nifti images dir> -sp <relative path to segmentations dir>
+```
+If you want to make sure correspondence of the name and value of segmentations, you can add the following commands after above command
+```bash
+-sl LabelValue1 LabelName1 LabelValue2 LabelName2 LabelValue3 LabelName3 ...
+```
+For example, if I have two labels for maxillary sinus named L-MS and R-MS
+```bash
+python registration.py -bp /Users/mikamixiao/Desktop -ip images -sp labels -sl 1 L-MS 2 R-MS
+```
+Final output of registered images and segmentations will be saved in
+```text
+imagesRS/ && labelsRS/
+```
+### Step 2: Create Datasplit for Training/Testing. Validation will be chosen automatically by nnUNet (filename format should be taskname_xxx.nii.gz)
+```bash
+python split_data.py -bp <full path of base dir> -ip <relative path to nifti images dir (imagesRS)> -sp <relative path to nifti segmentations dir (labelsRS)> -sl <a list of label name and corresponding label value> -ti <task id for nnUNet preprocessing> -tn <name of task>
+```
+For example
+```bash
+python split_data.py -bp /Users/mikamixiao/Desktop -ip imagesRS -sp labelsRS -sl 1 L-MS 2 R-MS -ti 001 -tn Sinus
+```
+### Step 3: Setup Bashrc
+Edit your `~/.bashrc` file with `gedit ~/.bashrc` or `nano ~/.bashrc`. At the end of the file, add the following lines:
+```bash
+export nnUNet_raw_data_base="<ABSOLUTE PATH TO BASE_DIR>/nnUnet/nnUNet_raw_data_base"
+export nnUNet_preprocessed="<ABSOLUTE PATH TO BASE_DIR>/nnUNet_preprocessed"
+export RESULTS_FOLDER="<ABSOLUTE PATH TO BASE_DIR>/nnUnet/nnUNet_trained_models"
+```
+After updating this you will need to source your `~/.bashrc` file.
+```bash
+source ~/.bashrc
+```
+This will deactivate your current conda environment.
+### Step 4: Verify and Preprocess Data
+Activate nnUNet environment
+```bash
+source /path/to/VIRTUAL_ENVIRONMENT/bin/activate
+```
+Run nnUNet preprocessing script.
+```bash
+nnUNet_plan_and_preprocess -t <task_id> --verify_dataset_integrity
+```
+Potential Error: You may need to edit the dataset.json file so that the labels are sequential. If you have at least 10 labels, then labels `10, 11, 12,...` will be arranged before labels `2, 3, 4, ...`. Doing this in a text editor is completely fine!
+## Train
+To train the model:
+```bash
+nnUNet_train 3d_fullres nnUNetTrainerV2 Task<task_num>_TemporalBone Y --npz
+```
+`Y` refers to the number of folds for cross-validation. If `Y` is set to `all` then all of the data will be used for training. If you want to try 5-folds cross validation, you should define Y as `0, 1, 2, 3, 4 ` for five times.
+`--npz` makes the models save the softmax outputs (uncompressed, large files) during the final validation. It should only be used if you are training multiple configurations, which requires `nnUNet_find_best_configuration` to find the best model. We omit this by default.
+## Inference
+To run inference on trained checkpoints and obtain evaluation results:
+`nnUNet_find_best_configuration` will print a string to the terminal with the inference commands you need to use.
+The easiest way to run inference is to simply use these commands.
+If you wish to manually specify the configuration(s) used for inference, use the following commands:
+For each of the desired configurations, run:
+```bash
+nnUNet_predict -i INPUT_FOLDER -o OUTPUT_FOLDER -t TASK_NAME_OR_ID -m CONFIGURATION --save_npz
+```
+Only specify `--save_npz` if you intend to use ensembling. `--save_npz` will make the command save the softmax
+probabilities alongside of the predicted segmentation masks requiring a lot of disk space.
+Please select a separate `OUTPUT_FOLDER` for each configuration!
+If you wish to run ensembling, you can ensemble the predictions from several configurations with the following command:
+```bash
+nnUNet_ensemble -f FOLDER1 FOLDER2 ... -o OUTPUT_FOLDER -pp POSTPROCESSING_FILE
+```
+You can specify an arbitrary number of folders, but remember that each folder needs to contain npz files that were
+generated by `nnUNet_predict`. For ensembling you can also specify a file that tells the command how to postprocess.
+These files are created when running `nnUNet_find_best_configuration` and are located in the respective trained model directory `(RESULTS_FOLDER/nnUNet/CONFIGURATION/TaskXXX_MYTASK/TRAINER_CLASS_NAME__PLANS_FILE_IDENTIFIER/postprocessing.json or RESULTS_FOLDER/nnUNet/ensembles/TaskXXX_MYTASK/ensemble_X__Y__Z--X__Y__Z/postprocessing.json)`. You can also choose to not provide a file (simply omit -pp) and nnU-Net will not run postprocessing.
+Note that per default, inference will be done with all available folds. We very strongly recommend you use all 5 folds.
+Thus, all 5 folds must have been trained prior to running inference. The list of available folds nnU-Net found will be
+printed at the start of the inference.
+## Evaluation
+To compute the dice score, average hausdorff distance and weighted hausdorff distance:
+```bash
+cd <path to repo>/metrics
+```
+Run the metrics.py to output a CSV file that contain the dice score and hausdorff distance for each segmentation:
+```bash
+python metrics.py -bp <full path of base dir> -gp <relative path of ground truth dir> -pp <relative path of predicted segmentations dir> -sp <save dir> -vt <Validation type: 'dsc', 'ahd', 'whd'>
+```
+Users can choose any combinations of evaluation types among these three choices.
+```text
+dsc: Dice Score
+ahd: Average Hausdorff Distance
+whd: Weighted Hausdorff Distance
+```
+If choosing ```whd``` and you do not have a probability map, you can use ```get_probability_map.py```to obtain one. Here is the way to use:
+```bash
+python get_probability_map.py -bp <full path of base dir> -pp <relative path of predicted segmentations dir> -rr <ratio to split skeleton> -ps <probability sequences>
+```
+Currently, we split the skeleton alongside the x axis and from ear end to nasal. Please make sure the probability sequences are matched to the splitted regions. The output probability map which is a text file will be stored in ```output/```under the ```base directory```. Once obtaining the probability map, you can import your customized probability map by adding following command when using ```metrics.py```:
+```bash
+-pm <relative path of probability map>
+```
+#### To draw the heat map to see the failing part of prediction:
+```bash
+python distanceVertex2Mesh.py -bp <full path of base dir> -gp <relative path of ground truth dir> -pp <relative path of predicted segmentations dir>
+```
+Once you get the closest distance (save in ```output/``` under ```base directory```) from prediction to ground truth, you can easily draw the heat map and use the color bar to show the change of differences (```ParaView``` is recommended)
+##  Citing Paper
+If you find this paper helpful, please consider citing:
+```bibtex
+@article{amanian2024deep,
+  title={A Deep Learning Framework for Analysis of the Eustachian Tube and the Internal Carotid Artery},
+  author={Amanian, Ameen and Jain, Aseem and Xiao, Yuliang and Kim, Chanha and Ding, Andy S and Sahu, Manish and Taylor, Russell and Unberath, Mathias and Ward, Bryan K and Galaiya, Deepa and others},
+  journal={Otolaryngology--Head and Neck Surgery},
+  publisher={Wiley Online Library}
+}
+```

assets/method.png ADDED Viewed

metrics/distanceVertex2Mesh.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import numpy as np
+import pyvista as pv
+import argparse
+import os
+import glob
+import trimesh
+def parse_command_line():
+    print('---'*10)
+    print('Parsing Command Line Arguments')
+    parser = argparse.ArgumentParser(description='Defacing protocol')
+    parser.add_argument('-bp', metavar='base path', type=str,
+                        help="Absolute path of the base directory")
+    parser.add_argument('-gp', metavar='ground truth path', type=str,
+                        help="Relative path of the ground truth model")
+    parser.add_argument('-pp', metavar='prediction path', type=str,
+                        help="Relative path of the prediction model")
+    argv = parser.parse_args()
+    return argv
+def distanceVertex2Mesh(mesh, vertex):
+    faces_as_array = mesh.faces.reshape((mesh.n_faces, 4))[:, 1:]
+    mesh_box = trimesh.Trimesh(vertices=mesh.points,
+                               faces=faces_as_array)
+    cp, cd, ci = trimesh.proximity.closest_point(mesh_box, vertex)
+    return cd
+def main():
+    args = parse_command_line()
+    base = args.bp
+    gt_path = args.gp
+    pred_path = args.pp
+    output_dir = os.path.join(base, 'output')
+    try:
+        os.mkdir(output_dir)
+    except:
+        print(f'{output_dir} already exists')
+    for i in glob.glob(os.path.join(base, gt_path) + '/*.vtk'):
+        filename = os.path.basename(i).split('.')[0]
+        #side = os.path.basename(i).split('.')[0].split('_')[0]
+        #scan_name = os.path.basename(i).split('.')[0].split('_')[0]
+        #scan_id = os.path.basename(i).split('.')[0].split('_')[1]
+        output_sub_dir = os.path.join(
+            base, 'output', filename)
+        try:
+            os.mkdir(output_sub_dir)
+        except:
+            print(f'{output_sub_dir} already exists')
+        gt_mesh = pv.read(i)
+        pred_mesh = pv.read(os.path.join(
+            base, pred_path, filename + '.vtk'))
+        pred_vertices = np.array(pred_mesh.points)
+        cd = distanceVertex2Mesh(gt_mesh, pred_vertices)
+        pred_mesh['dist'] = cd
+        pred_mesh.save(os.path.join(output_sub_dir, filename + '.vtk'))
+if __name__ == '__main__':
+    main()

metrics/get_probability_map.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import numpy as np
+import pyvista as pv
+import argparse
+import os
+import glob
+import skeletor as sk
+import trimesh
+import navis
+def parse_command_line():
+    print('---'*10)
+    print('Parsing Command Line Arguments')
+    parser = argparse.ArgumentParser(description='Defacing protocol')
+    parser.add_argument('-bp', metavar='base path', type=str,
+                        help="Absolute path of the base directory")
+    parser.add_argument('-gp', metavar='ground truth path', type=str,
+                        help="Relative path of the ground truth model")
+    parser.add_argument('-pp', metavar='prediction path', type=str,
+                        help="Relative path of the prediction model")
+    parser.add_argument('-rr', metavar='ratio to split skeleton', type=int, nargs='+',
+                        help="Ratio to split the skeleton")
+    parser.add_argument('-ps', metavar='probability sequences', type=float, nargs='+',
+                        help="Proability sequences for each splitted region")
+    argv = parser.parse_args()
+    return argv
+def distanceVertex2Path(mesh, skeleton, probability_map):
+    if len(probability_map) == 0:
+        print('empty probability_map !!!')
+        return np.inf
+    if not mesh.is_all_triangles():
+        print('only triangulations is allowed (Faces do not have 3 Vertices)!')
+        return np.inf
+    if hasattr(mesh, 'points'):
+        points = np.array(mesh.points)
+    else:
+        print('mesh structure must contain fields ''vertices'' and ''faces''!')
+        return np.inf
+    if hasattr(skeleton, 'vertices'):
+        vertex = skeleton.vertices
+    else:
+        print('skeleton structure must contain fields ''vertices'' !!!')
+        return np.inf
+    numV, dim = points.shape
+    numT, dimT = vertex.shape
+    if dim != dimT or dim != 3:
+        print('mesh and vertices must be in 3D space!')
+        return np.inf
+    d_min = np.ones(numV, dtype=np.float64) * np.inf
+    pm = []
+    # first check: find closest distance from vertex to vertex
+    for i in range(numV):
+        min_idx = -1
+        for j in range(numT):
+            v1 = points[i, :]
+            v2 = vertex[j, :]
+            d = distance3DV2V(v1, v2)
+            if d < d_min[i]:
+                d_min[i] = d
+                min_idx = j
+        pm.append(probability_map[min_idx])
+    print("check is finished !!!")
+    return pm
+def generate_probability_map(skeleton, split_ratio, probability):
+    points = skeleton.vertices
+    center = skeleton.skeleton.centroid
+    x = sorted(points[:, 0])
+    left = []
+    right = []
+    for i in range(len(x)):
+        if x[i] < center[0]:
+            left.append(x[i])
+        else:
+            right.append(x[i])
+    right_map = []
+    left_map = []
+    sec_old = 0
+    for j in range(len(split_ratio)):
+        if j == len(split_ratio) - 1:
+            sec_len = len(left) - sec_old
+        else:
+            sec_len = int(round(len(left) * split_ratio[j] / 100))
+        for k in range(sec_old, sec_old + sec_len):
+            left_map.append(probability[j])
+        sec_old += sec_len
+    sec_old = 0
+    for j in range(len(split_ratio)-1, -1, -1):
+        if j == 0:
+            sec_len = len(right) - sec_old
+        else:
+            sec_len = int(round(len(right) * split_ratio[j] / 100))
+        for k in range(sec_old, sec_old + sec_len):
+            right_map.append(probability[j])
+        sec_old += sec_len
+    final_map = []
+    row = points.shape[0]
+    assert len(left) + len(right) == row
+    for m in range(row):
+        ver_x = points[m, 0]
+        if ver_x in left:
+            index = left.index(ver_x)
+            final_map.append(left_map[index])
+        else:
+            index = right.index(ver_x)
+            final_map.append(right_map[index])
+    return final_map
+def skeleton(mesh):
+    faces_as_array = mesh.faces.reshape((mesh.n_faces, 4))[:, 1:]
+    trmesh = trimesh.Trimesh(mesh.points, faces_as_array)
+    fixed = sk.pre.fix_mesh(trmesh, remove_disconnected=5, inplace=False)
+    skel = sk.skeletonize.by_wavefront(fixed, waves=1, step_size=1)
+    # Create a neuron from your skeleton
+    n = navis.TreeNeuron(skel, soma=None)
+    # keep only the two longest linear section in your skeleton
+    long2 = navis.longest_neurite(n, n=2, from_root=False)
+    # This renumbers nodes
+    swc = navis.io.swc_io.make_swc_table(long2)
+    # We also need to rename some columns
+    swc = swc.rename({'PointNo': 'node_id', 'Parent': 'parent_id', 'X': 'x',
+                      'Y': 'y', 'Z': 'z', 'Radius': 'radius'}, axis=1).drop('Label', axis=1)
+    # Skeletor excepts node IDs to start with 0, but navis starts at 1 for SWC
+    swc['node_id'] -= 1
+    swc.loc[swc.parent_id > 0, 'parent_id'] -= 1
+    # Create the skeletor.Skeleton
+    skel2 = sk.Skeleton(swc)
+    return skel2
+def distance3DV2V(v1, v2):
+    d = np.linalg.norm(v1-v2)
+    return d
+def main():
+    args = parse_command_line()
+    base = args.bp
+    gt_path = args.gp
+    pred_path = args.pp
+    area_ratio = args.rr
+    prob_sequences = args.ps
+    output_dir = os.path.join(base, 'output')
+    try:
+        os.mkdir(output_dir)
+    except:
+        print(f'{output_dir} already exists')
+    for i in glob.glob(os.path.join(base, gt_path) + '/*.vtk'):
+        scan_name = os.path.basename(i).split('.')[0].split('_')[1]
+        scan_id = os.path.basename(i).split('.')[0].split('_')[2]
+        output_sub_dir = os.path.join(
+            base, 'output', scan_name + '_' + scan_id)
+        try:
+            os.mkdir(output_sub_dir)
+        except:
+            print(f'{output_sub_dir} already exists')
+        gt_mesh = pv.read(i)
+        pred_mesh = pv.read(os.path.join(
+            base, pred_path, 'pred_' + scan_name + '_' + scan_id + '.vtk'))
+        pred_skel = skeleton(pred_mesh)
+        prob_map = generate_probability_map(
+            pred_skel, area_ratio, prob_sequences)
+        pm = distanceVertex2Path(pred_mesh, pred_skel, prob_map)
+        if(pm == np.Inf):
+            print('something with mesh, probability map and skeleton are wrong !!!')
+            return
+        np.savetxt(os.path.join(base, output_sub_dir, scan_id + '.txt'), pm)
+if __name__ == '__main__':
+    main()

metrics/lookup_tables.py ADDED Viewed

	@@ -0,0 +1,463 @@

+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import numpy as np
+ENCODE_NEIGHBOURHOOD_3D_KERNEL = np.array([[[128, 64], [32, 16]], [[8, 4],
+                                                                   [2, 1]]])
+"""
+lookup_tables.py
+all of the lookup-tables functions are borrowed from DeepMind surface_distance repository
+"""
+# _NEIGHBOUR_CODE_TO_NORMALS is a lookup table.
+# For every binary neighbour code
+# (2x2x2 neighbourhood = 8 neighbours = 8 bits = 256 codes)
+# it contains the surface normals of the triangles (called "surfel" for
+# "surface element" in the following). The length of the normal
+# vector encodes the surfel area.
+#
+# created using the marching_cube algorithm
+# see e.g. https://en.wikipedia.org/wiki/Marching_cubes
+# pylint: disable=line-too-long
+_NEIGHBOUR_CODE_TO_NORMALS = [
+    [[0, 0, 0]],
+    [[0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125]],
+    [[-0.25, -0.25, 0.0], [0.25, 0.25, -0.0]],
+    [[0.125, -0.125, 0.125]],
+    [[-0.25, -0.0, -0.25], [0.25, 0.0, 0.25]],
+    [[0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[0.5, 0.0, -0.0], [0.25, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[-0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25]],
+    [[0.5, 0.0, 0.0], [-0.25, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[0.5, 0.0, 0.0], [0.25, -0.25, 0.25], [-0.125, 0.125, -0.125]],
+    [[-0.5, 0.0, 0.0], [-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0]],
+    [[0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25]],
+    [[-0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, -0.5, 0.0], [0.25, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, 0.0, -0.5], [0.25, 0.25, 0.25], [-0.125, -0.125, -0.125]],
+    [[-0.125, -0.125, 0.125], [0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[-0.125, -0.125, -0.125], [-0.25, -0.25, -0.25],
+        [0.25, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25], [0.125, -0.125, -0.125]],
+    [[0.125, 0.125, 0.125], [0.375, 0.375, 0.375],
+        [0.0, -0.25, 0.25], [-0.25, 0.0, 0.25]],
+    [[0.125, -0.125, -0.125], [0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[0.375, 0.375, 0.375], [0.0, 0.25, -0.25],
+        [-0.125, -0.125, -0.125], [-0.25, 0.25, 0.0]],
+    [[-0.5, 0.0, 0.0], [-0.125, -0.125, -0.125],
+        [-0.25, -0.25, -0.25], [0.125, 0.125, 0.125]],
+    [[-0.5, 0.0, 0.0], [-0.125, -0.125, -0.125], [-0.25, -0.25, -0.25]],
+    [[0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.0, -0.25, 0.25], [0.0, 0.25, -0.25]],
+    [[0.0, -0.5, 0.0], [0.125, 0.125, -0.125], [0.25, 0.25, -0.25]],
+    [[0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [-0.25, -0.0, -0.25], [0.25, 0.0, 0.25]],
+    [[0.0, -0.25, 0.25], [0.0, 0.25, -0.25], [0.125, -0.125, 0.125]],
+    [[-0.375, -0.375, 0.375], [-0.0, 0.25, 0.25],
+        [0.125, 0.125, -0.125], [-0.25, -0.0, -0.25]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[-0.0, 0.0, 0.5], [-0.25, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.25, 0.25, -0.25], [0.25, 0.25, -0.25],
+        [0.125, 0.125, -0.125], [-0.125, -0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[0.5, 0.0, 0.0], [0.25, -0.25, 0.25],
+        [-0.125, 0.125, -0.125], [0.125, -0.125, 0.125]],
+    [[0.0, 0.25, -0.25], [0.375, -0.375, -0.375],
+        [-0.125, 0.125, 0.125], [0.25, 0.25, 0.0]],
+    [[-0.5, 0.0, 0.0], [-0.25, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.25, -0.25, 0.0], [-0.25, 0.25, 0.0]],
+    [[0.0, 0.5, 0.0], [-0.25, 0.25, 0.25], [0.125, -0.125, -0.125]],
+    [[0.0, 0.5, 0.0], [0.125, -0.125, 0.125], [-0.25, 0.25, -0.25]],
+    [[0.0, 0.5, 0.0], [0.0, -0.5, 0.0]],
+    [[0.25, -0.25, 0.0], [-0.25, 0.25, 0.0], [0.125, -0.125, 0.125]],
+    [[-0.375, -0.375, -0.375], [-0.25, 0.0, 0.25],
+        [-0.125, -0.125, -0.125], [-0.25, 0.25, 0.0]],
+    [[0.125, 0.125, 0.125], [0.0, -0.5, 0.0],
+        [-0.25, -0.25, -0.25], [-0.125, -0.125, -0.125]],
+    [[0.0, -0.5, 0.0], [-0.25, -0.25, -0.25], [-0.125, -0.125, -0.125]],
+    [[-0.125, 0.125, 0.125], [0.25, -0.25, 0.0], [-0.25, 0.25, 0.0]],
+    [[0.0, 0.5, 0.0], [0.25, 0.25, -0.25],
+        [-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.375, 0.375, -0.375], [-0.25, -0.25, 0.0],
+        [-0.125, 0.125, -0.125], [-0.25, 0.0, 0.25]],
+    [[0.0, 0.5, 0.0], [0.25, 0.25, -0.25], [-0.125, -0.125, 0.125]],
+    [[0.25, -0.25, 0.0], [-0.25, 0.25, 0.0],
+        [0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0], [-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0]],
+    [[-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0]],
+    [[-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.25, -0.25, 0.0], [0.25, 0.25, -0.0]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25]],
+    [[0.0, 0.0, 0.5], [0.25, -0.25, 0.25], [0.125, -0.125, 0.125]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.375, -0.375, 0.375], [0.0, -0.25, -0.25],
+        [-0.125, 0.125, -0.125], [0.25, 0.25, 0.0]],
+    [[-0.125, -0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25]],
+    [[0.5, 0.0, 0.0], [-0.25, -0.25, 0.25],
+        [-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.0, 0.5, 0.0], [-0.25, 0.25, -0.25], [0.125, -0.125, 0.125]],
+    [[-0.25, 0.25, -0.25], [-0.25, 0.25, -0.25],
+        [-0.125, 0.125, -0.125], [-0.125, 0.125, -0.125]],
+    [[-0.25, 0.0, -0.25], [0.375, -0.375, -0.375],
+        [0.0, 0.25, -0.25], [-0.125, 0.125, 0.125]],
+    [[0.5, 0.0, 0.0], [-0.25, 0.25, -0.25], [0.125, -0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [0.25, 0.0, -0.25]],
+    [[-0.0, 0.0, 0.5], [-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.25, 0.0, 0.25], [0.25, 0.0, -0.25]],
+    [[-0.25, -0.0, -0.25], [-0.375, 0.375, 0.375],
+        [-0.25, -0.25, 0.0], [-0.125, 0.125, 0.125]],
+    [[0.0, 0.0, -0.5], [0.25, 0.25, -0.25], [-0.125, -0.125, 0.125]],
+    [[-0.0, 0.0, 0.5], [0.0, 0.0, 0.5]],
+    [[0.125, 0.125, 0.125], [0.125, 0.125, 0.125],
+        [0.25, 0.25, 0.25], [0.0, 0.0, 0.5]],
+    [[0.125, 0.125, 0.125], [0.25, 0.25, 0.25], [0.0, 0.0, 0.5]],
+    [[-0.25, 0.0, 0.25], [0.25, 0.0, -0.25], [-0.125, 0.125, 0.125]],
+    [[-0.0, 0.0, 0.5], [0.25, -0.25, 0.25],
+        [0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25],
+        [-0.25, 0.0, 0.25], [0.25, 0.0, -0.25]],
+    [[0.125, -0.125, 0.125], [0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[0.25, 0.0, 0.25], [-0.375, -0.375, 0.375],
+        [-0.25, 0.25, 0.0], [-0.125, -0.125, 0.125]],
+    [[-0.0, 0.0, 0.5], [0.25, -0.25, 0.25], [0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[-0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [0.0, -0.25, 0.25], [0.0, 0.25, -0.25]],
+    [[0.0, -0.5, 0.0], [0.125, 0.125, -0.125],
+        [0.25, 0.25, -0.25], [-0.125, -0.125, 0.125]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25], [0.125, -0.125, 0.125]],
+    [[0.0, 0.0, 0.5], [0.25, -0.25, 0.25],
+        [0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25],
+        [0.0, -0.25, 0.25], [0.0, 0.25, -0.25]],
+    [[0.0, 0.25, 0.25], [0.0, 0.25, 0.25], [0.125, -0.125, -0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, 0.125],
+        [-0.125, -0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[-0.0, 0.0, 0.5], [-0.25, -0.25, 0.25],
+        [-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[-0.0, 0.5, 0.0], [-0.25, 0.25, -0.25],
+        [0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.5, 0.0, -0.0], [0.25, -0.25, -0.25], [0.125, -0.125, -0.125]],
+    [[-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125],
+        [-0.25, 0.25, 0.25], [0.125, -0.125, -0.125]],
+    [[0.375, -0.375, 0.375], [0.0, 0.25, 0.25],
+        [-0.125, 0.125, -0.125], [-0.25, 0.0, 0.25]],
+    [[0.0, -0.5, 0.0], [-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[-0.375, -0.375, 0.375], [0.25, -0.25, 0.0],
+        [0.0, 0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [-0.25, 0.25, 0.25], [0.0, 0.0, 0.5]],
+    [[0.125, 0.125, 0.125], [0.0, 0.25, 0.25], [0.0, 0.25, 0.25]],
+    [[0.0, 0.25, 0.25], [0.0, 0.25, 0.25]],
+    [[0.5, 0.0, -0.0], [0.25, 0.25, 0.25],
+        [0.125, 0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [-0.125, -0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[-0.25, -0.0, -0.25], [0.25, 0.0, 0.25], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.25, -0.25, 0.0], [0.25, 0.25, -0.0], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.25, -0.25, 0.0], [0.25, 0.25, -0.0], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.25, -0.0, -0.25], [0.25, 0.0, 0.25], [0.125, 0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [-0.125, -0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[0.5, 0.0, -0.0], [0.25, 0.25, 0.25],
+        [0.125, 0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[0.0, 0.25, 0.25], [0.0, 0.25, 0.25]],
+    [[0.125, 0.125, 0.125], [0.0, 0.25, 0.25], [0.0, 0.25, 0.25]],
+    [[-0.125, 0.125, 0.125], [-0.25, 0.25, 0.25], [0.0, 0.0, 0.5]],
+    [[-0.375, -0.375, 0.375], [0.25, -0.25, 0.0],
+        [0.0, 0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.0, -0.5, 0.0], [-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[0.375, -0.375, 0.375], [0.0, 0.25, 0.25],
+        [-0.125, 0.125, -0.125], [-0.25, 0.0, 0.25]],
+    [[-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125],
+        [-0.25, 0.25, 0.25], [0.125, -0.125, -0.125]],
+    [[0.5, 0.0, -0.0], [0.25, -0.25, -0.25], [0.125, -0.125, -0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[-0.0, 0.5, 0.0], [-0.25, 0.25, -0.25],
+        [0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[-0.0, 0.0, 0.5], [-0.25, -0.25, 0.25],
+        [-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, 0.125],
+        [-0.125, -0.125, 0.125], [0.125, 0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[0.0, 0.25, 0.25], [0.0, 0.25, 0.25], [0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25], [0.0, 0.25, 0.25], [0.0, 0.25, 0.25]],
+    [[0.0, 0.0, 0.5], [0.25, -0.25, 0.25],
+        [0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25], [0.125, -0.125, 0.125]],
+    [[0.0, -0.5, 0.0], [0.125, 0.125, -0.125],
+        [0.25, 0.25, -0.25], [-0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [0.0, -0.25, 0.25], [0.0, 0.25, -0.25]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[0.125, 0.125, 0.125], [0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[-0.0, 0.0, 0.5], [0.25, -0.25, 0.25], [0.125, -0.125, 0.125]],
+    [[0.25, 0.0, 0.25], [-0.375, -0.375, 0.375],
+        [-0.25, 0.25, 0.0], [-0.125, -0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[-0.25, -0.0, -0.25], [0.25, 0.0, 0.25],
+        [0.25, 0.0, 0.25], [0.25, 0.0, 0.25]],
+    [[-0.0, 0.0, 0.5], [0.25, -0.25, 0.25],
+        [0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [0.25, 0.0, -0.25], [-0.125, 0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.25, 0.25, 0.25], [0.0, 0.0, 0.5]],
+    [[0.125, 0.125, 0.125], [0.125, 0.125, 0.125],
+        [0.25, 0.25, 0.25], [0.0, 0.0, 0.5]],
+    [[-0.0, 0.0, 0.5], [0.0, 0.0, 0.5]],
+    [[0.0, 0.0, -0.5], [0.25, 0.25, -0.25], [-0.125, -0.125, 0.125]],
+    [[-0.25, -0.0, -0.25], [-0.375, 0.375, 0.375],
+        [-0.25, -0.25, 0.0], [-0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.25, 0.0, 0.25], [0.25, 0.0, -0.25]],
+    [[-0.0, 0.0, 0.5], [-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [0.25, 0.0, -0.25]],
+    [[0.5, 0.0, 0.0], [-0.25, 0.25, -0.25], [0.125, -0.125, 0.125]],
+    [[-0.25, 0.0, -0.25], [0.375, -0.375, -0.375],
+        [0.0, 0.25, -0.25], [-0.125, 0.125, 0.125]],
+    [[-0.25, 0.25, -0.25], [-0.25, 0.25, -0.25],
+        [-0.125, 0.125, -0.125], [-0.125, 0.125, -0.125]],
+    [[-0.0, 0.5, 0.0], [-0.25, 0.25, -0.25], [0.125, -0.125, 0.125]],
+    [[0.5, 0.0, 0.0], [-0.25, -0.25, 0.25],
+        [-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[0.375, -0.375, 0.375], [0.0, -0.25, -0.25],
+        [-0.125, 0.125, -0.125], [0.25, 0.25, 0.0]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.0, 0.0, 0.5], [0.25, -0.25, 0.25], [0.125, -0.125, 0.125]],
+    [[0.0, -0.25, 0.25], [0.0, -0.25, 0.25]],
+    [[-0.125, -0.125, 0.125], [-0.25, -0.25, 0.0], [0.25, 0.25, -0.0]],
+    [[-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, -0.125, 0.125]],
+    [[-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0]],
+    [[0.125, 0.125, 0.125], [-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0]],
+    [[-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0], [-0.125, -0.125, 0.125]],
+    [[-0.25, -0.25, 0.0], [-0.25, -0.25, 0.0],
+        [-0.25, -0.25, 0.0], [0.25, 0.25, -0.0]],
+    [[0.0, 0.5, 0.0], [0.25, 0.25, -0.25], [-0.125, -0.125, 0.125]],
+    [[-0.375, 0.375, -0.375], [-0.25, -0.25, 0.0],
+        [-0.125, 0.125, -0.125], [-0.25, 0.0, 0.25]],
+    [[0.0, 0.5, 0.0], [0.25, 0.25, -0.25],
+        [-0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.25, -0.25, 0.0], [-0.25, 0.25, 0.0]],
+    [[0.0, -0.5, 0.0], [-0.25, -0.25, -0.25], [-0.125, -0.125, -0.125]],
+    [[0.125, 0.125, 0.125], [0.0, -0.5, 0.0],
+        [-0.25, -0.25, -0.25], [-0.125, -0.125, -0.125]],
+    [[-0.375, -0.375, -0.375], [-0.25, 0.0, 0.25],
+        [-0.125, -0.125, -0.125], [-0.25, 0.25, 0.0]],
+    [[0.25, -0.25, 0.0], [-0.25, 0.25, 0.0], [0.125, -0.125, 0.125]],
+    [[0.0, 0.5, 0.0], [0.0, -0.5, 0.0]],
+    [[0.0, 0.5, 0.0], [0.125, -0.125, 0.125], [-0.25, 0.25, -0.25]],
+    [[0.0, 0.5, 0.0], [-0.25, 0.25, 0.25], [0.125, -0.125, -0.125]],
+    [[0.25, -0.25, 0.0], [-0.25, 0.25, 0.0]],
+    [[-0.5, 0.0, 0.0], [-0.25, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.0, 0.25, -0.25], [0.375, -0.375, -0.375],
+        [-0.125, 0.125, 0.125], [0.25, 0.25, 0.0]],
+    [[0.5, 0.0, 0.0], [0.25, -0.25, 0.25],
+        [-0.125, 0.125, -0.125], [0.125, -0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[0.25, 0.25, -0.25], [0.25, 0.25, -0.25],
+        [0.125, 0.125, -0.125], [-0.125, -0.125, 0.125]],
+    [[-0.0, 0.0, 0.5], [-0.25, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[-0.375, -0.375, 0.375], [-0.0, 0.25, 0.25],
+        [0.125, 0.125, -0.125], [-0.25, -0.0, -0.25]],
+    [[0.0, -0.25, 0.25], [0.0, 0.25, -0.25], [0.125, -0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [-0.25, -0.0, -0.25], [0.25, 0.0, 0.25]],
+    [[0.125, -0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.0, -0.5, 0.0], [0.125, 0.125, -0.125], [0.25, 0.25, -0.25]],
+    [[0.0, -0.25, 0.25], [0.0, 0.25, -0.25]],
+    [[0.125, 0.125, 0.125], [0.125, -0.125, 0.125]],
+    [[0.125, -0.125, 0.125]],
+    [[-0.5, 0.0, 0.0], [-0.125, -0.125, -0.125], [-0.25, -0.25, -0.25]],
+    [[-0.5, 0.0, 0.0], [-0.125, -0.125, -0.125],
+        [-0.25, -0.25, -0.25], [0.125, 0.125, 0.125]],
+    [[0.375, 0.375, 0.375], [0.0, 0.25, -0.25],
+        [-0.125, -0.125, -0.125], [-0.25, 0.25, 0.0]],
+    [[0.125, -0.125, -0.125], [0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[0.125, 0.125, 0.125], [0.375, 0.375, 0.375],
+        [0.0, -0.25, 0.25], [-0.25, 0.0, 0.25]],
+    [[-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25], [0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[-0.125, 0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[-0.125, -0.125, -0.125], [-0.25, -0.25, -0.25],
+        [0.25, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, 0.0, -0.5], [0.25, 0.25, 0.25], [-0.125, -0.125, -0.125]],
+    [[0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, -0.5, 0.0], [0.25, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[-0.125, -0.125, 0.125], [0.125, -0.125, -0.125]],
+    [[0.0, -0.25, -0.25], [0.0, 0.25, 0.25]],
+    [[0.125, -0.125, -0.125]],
+    [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0]],
+    [[-0.5, 0.0, 0.0], [-0.25, 0.25, 0.25], [-0.125, 0.125, 0.125]],
+    [[0.5, 0.0, 0.0], [0.25, -0.25, 0.25], [-0.125, 0.125, -0.125]],
+    [[0.25, -0.25, 0.0], [0.25, -0.25, 0.0]],
+    [[0.5, 0.0, 0.0], [-0.25, -0.25, 0.25], [-0.125, -0.125, 0.125]],
+    [[-0.25, 0.0, 0.25], [-0.25, 0.0, 0.25]],
+    [[0.125, 0.125, 0.125], [-0.125, 0.125, 0.125]],
+    [[-0.125, 0.125, 0.125]],
+    [[0.5, 0.0, -0.0], [0.25, 0.25, 0.25], [0.125, 0.125, 0.125]],
+    [[0.125, -0.125, 0.125], [-0.125, -0.125, 0.125]],
+    [[-0.25, -0.0, -0.25], [0.25, 0.0, 0.25]],
+    [[0.125, -0.125, 0.125]],
+    [[-0.25, -0.25, 0.0], [0.25, 0.25, -0.0]],
+    [[-0.125, -0.125, 0.125]],
+    [[0.125, 0.125, 0.125]],
+    [[0, 0, 0]]]
+# pylint: enable=line-too-long
+def create_table_neighbour_code_to_surface_area(spacing_mm):
+    """Returns an array mapping neighbourhood code to the surface elements area.
+    Note that the normals encode the initial surface area. This function computes
+    the area corresponding to the given `spacing_mm`.
+    Args:
+      spacing_mm: 3-element list-like structure. Voxel spacing in x0, x1 and x2
+        direction.
+    """
+    # compute the area for all 256 possible surface elements
+    # (given a 2x2x2 neighbourhood) according to the spacing_mm
+    neighbour_code_to_surface_area = np.zeros([256])
+    for code in range(256):
+        normals = np.array(_NEIGHBOUR_CODE_TO_NORMALS[code])
+        sum_area = 0
+        for normal_idx in range(normals.shape[0]):
+            # normal vector
+            n = np.zeros([3])
+            n[0] = normals[normal_idx, 0] * spacing_mm[1] * spacing_mm[2]
+            n[1] = normals[normal_idx, 1] * spacing_mm[0] * spacing_mm[2]
+            n[2] = normals[normal_idx, 2] * spacing_mm[0] * spacing_mm[1]
+            area = np.linalg.norm(n)
+            sum_area += area
+        neighbour_code_to_surface_area[code] = sum_area
+    return neighbour_code_to_surface_area
+# In the neighbourhood, points are ordered: top left, top right, bottom left,
+# bottom right.
+ENCODE_NEIGHBOURHOOD_2D_KERNEL = np.array([[8, 4], [2, 1]])
+def create_table_neighbour_code_to_contour_length(spacing_mm):
+    """Returns an array mapping neighbourhood code to the contour length.
+    For the list of possible cases and their figures, see page 38 from:
+    https://nccastaff.bournemouth.ac.uk/jmacey/MastersProjects/MSc14/06/thesis.pdf
+    In 2D, each point has 4 neighbors. Thus, are 16 configurations. A
+    configuration is encoded with '1' meaning "inside the object" and '0' "outside
+    the object". The points are ordered: top left, top right, bottom left, bottom
+    right.
+    The x0 axis is assumed vertical downward, and the x1 axis is horizontal to the
+    right:
+     (0, 0) --> (0, 1)
+       |
+     (1, 0)
+    Args:
+      spacing_mm: 2-element list-like structure. Voxel spacing in x0 and x1
+        directions.
+    """
+    neighbour_code_to_contour_length = np.zeros([16])
+    vertical = spacing_mm[0]
+    horizontal = spacing_mm[1]
+    diag = 0.5 * math.sqrt(spacing_mm[0]**2 + spacing_mm[1]**2)
+    # pyformat: disable
+    neighbour_code_to_contour_length[int("00"
+                                         "01", 2)] = diag
+    neighbour_code_to_contour_length[int("00"
+                                         "10", 2)] = diag
+    neighbour_code_to_contour_length[int("00"
+                                         "11", 2)] = horizontal
+    neighbour_code_to_contour_length[int("01"
+                                         "00", 2)] = diag
+    neighbour_code_to_contour_length[int("01"
+                                         "01", 2)] = vertical
+    neighbour_code_to_contour_length[int("01"
+                                         "10", 2)] = 2*diag
+    neighbour_code_to_contour_length[int("01"
+                                         "11", 2)] = diag
+    neighbour_code_to_contour_length[int("10"
+                                         "00", 2)] = diag
+    neighbour_code_to_contour_length[int("10"
+                                         "01", 2)] = 2*diag
+    neighbour_code_to_contour_length[int("10"
+                                         "10", 2)] = vertical
+    neighbour_code_to_contour_length[int("10"
+                                         "11", 2)] = diag
+    neighbour_code_to_contour_length[int("11"
+                                         "00", 2)] = horizontal
+    neighbour_code_to_contour_length[int("11"
+                                         "01", 2)] = diag
+    neighbour_code_to_contour_length[int("11"
+                                         "10", 2)] = diag
+    # pyformat: enable
+    return neighbour_code_to_contour_length

metrics/metrics.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import numpy as np
+import nibabel as nib
+import ants
+import argparse
+import pandas as pd
+import glob
+import os
+import surface_distance
+import nrrd
+import shutil
+import distanceVertex2Mesh
+import textwrap
+def parse_command_line():
+    print('---'*10)
+    print('Parsing Command Line Arguments')
+    parser = argparse.ArgumentParser(
+        description='Inference evaluation pipeline for image registration-segmentation', formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('-bp', metavar='base path', type=str,
+                        help="Absolute path of the base directory")
+    parser.add_argument('-gp', metavar='ground truth path', type=str,
+                        help="Relative path of the ground truth segmentation directory")
+    parser.add_argument('-pp', metavar='predicted path', type=str,
+                        help="Relative path of predicted segmentation directory")
+    parser.add_argument('-sp', metavar='save path', type=str,
+                        help="Relative path of CSV file directory to save, if not specify, default is base directory")
+    parser.add_argument('-vt', metavar='validation type', type=str, nargs='+',
+                        help=textwrap.dedent('''Validation type:
+                dsc: Dice Score
+                ahd: Average Hausdorff Distance
+                whd: Weighted Hausdorff Distance
+                        '''))
+    parser.add_argument('-pm', metavar='probability map path', type=str,
+                        help="Relative path of text file directory of probability map")
+    parser.add_argument('-fn', metavar='file name', type=str,
+                        help="name of output file")
+    parser.add_argument('-reg', action='store_true',
+                        help="check if the input files are registration predictions")
+    parser.add_argument('-tp', metavar='type of segmentation', type=str,
+                        help=textwrap.dedent('''Segmentation type:
+                ET: Eustachian Tube
+                NC: Nasal Cavity
+                HT: Head Tumor
+                        '''))
+    parser.add_argument('-sl', metavar='segmentation information list', type=str, nargs='+',
+                        help='a list of label name and corresponding value')
+    parser.add_argument('-cp', metavar='current prefix of filenames', type=str,
+                        help='current prefix of filenames')
+    argv = parser.parse_args()
+    return argv
+def rename(prefix, filename):
+    name = filename.split('.')[0][-3:]
+    name = prefix + '_' + name
+    return name
+def dice_coefficient_and_hausdorff_distance(filename, img_np_pred, img_np_gt, num_classes, spacing, probability_map, dsc, ahd, whd, average_DSC, average_HD):
+    df = pd.DataFrame()
+    data_gt, bool_gt = make_one_hot(img_np_gt, num_classes)
+    data_pred, bool_pred = make_one_hot(img_np_pred, num_classes)
+    for i in range(1, num_classes):
+        df1 = pd.DataFrame([[filename, i]], columns=[
+            'File ID', 'Label Value'])
+        if dsc:
+            if data_pred[i].any():
+                volume_sum = data_gt[i].sum() + data_pred[i].sum()
+                if volume_sum == 0:
+                    return np.NaN
+                volume_intersect = (data_gt[i] & data_pred[i]).sum()
+                dice = 2*volume_intersect / volume_sum
+                df1['Dice Score'] = dice
+                average_DSC[i-1] += dice
+            else:
+                dice = 0.0
+                df1['Dice Score'] = dice
+                average_DSC[i-1] += dice
+        if ahd:
+            if data_pred[i].any():
+                avd = average_hausdorff_distance(bool_gt[i], bool_pred[i], spacing)
+                df1['Average Hausdorff Distance'] = avd
+                average_HD[i-1] += avd
+            else:
+                avd = np.nan
+                df1['Average Hausdorff Distance'] = avd
+                average_HD[i-1] += avd
+        if whd:
+            # wgd = weighted_hausdorff_distance(gt, pred, probability_map)
+            # df1['Weighted Hausdorff Distance'] = wgd
+            pass
+        df = pd.concat([df, df1])
+    return df, average_DSC, average_HD
+def make_one_hot(img_np, num_classes):
+    img_one_hot_dice = np.zeros(
+        (num_classes, img_np.shape[0], img_np.shape[1], img_np.shape[2]), dtype=np.int8)
+    img_one_hot_hd = np.zeros(
+        (num_classes, img_np.shape[0], img_np.shape[1], img_np.shape[2]), dtype=bool)
+    for i in range(num_classes):
+        a = (img_np == i)
+        img_one_hot_dice[i, :, :, :] = a
+        img_one_hot_hd[i, :, :, :] = a
+    return img_one_hot_dice, img_one_hot_hd
+def average_hausdorff_distance(img_np_gt, img_np_pred, spacing):
+    surf_distance = surface_distance.compute_surface_distances(
+        img_np_gt, img_np_pred, spacing)
+    gp, pg = surface_distance.compute_average_surface_distance(surf_distance)
+    return (gp + pg) / 2
+def checkSegFormat(base, segmentation, type, prefix=None):
+    if type == 'gt':
+        save_dir = os.path.join(base, 'gt_reformat_labels')
+        path = segmentation
+    else:
+        save_dir = os.path.join(base, 'pred_reformat_labels')
+        path = os.path.join(base, segmentation)
+    try:
+        os.mkdir(save_dir)
+    except:
+        print(f'{save_dir} already exists')
+    for file in os.listdir(path):
+        if type == 'gt':
+            if prefix is not None:
+                name = rename(prefix, file)
+            else:
+                name = file.split('.')[0]
+        else:
+            name = file.split('.')[0]
+        if file.endswith('seg.nrrd'):
+            ants_img = ants.image_read(os.path.join(path, file))
+            header = nrrd.read_header(os.path.join(path, file))
+            filename = os.path.join(save_dir, name + '.nii.gz')
+            nrrd2nifti(ants_img, header, filename)
+        elif file.endswith('nii'):
+            image = ants.image_read(os.path.join(path, file))
+            image.to_file(os.path.join(save_dir, name + '.nii.gz'))
+        elif file.endswith('nii.gz'):
+            shutil.copy(os.path.join(path, file), os.path.join(save_dir, name + '.nii.gz'))
+    return save_dir
+def nrrd2nifti(img, header, filename):
+    img_as_np = img.view(single_components=True)
+    data = convert_to_one_hot(img_as_np, header)
+    foreground = np.max(data, axis=0)
+    labelmap = np.multiply(np.argmax(data, axis=0) + 1,
+                           foreground).astype('uint8')
+    segmentation_img = ants.from_numpy(
+        labelmap, origin=img.origin, spacing=img.spacing, direction=img.direction)
+    print('-- Saving NII Segmentations')
+    segmentation_img.to_file(filename)
+def convert_to_one_hot(data, header, segment_indices=None):
+    print('---'*10)
+    print("converting to one hot")
+    layer_values = get_layer_values(header)
+    label_values = get_label_values(header)
+    # Newer Slicer NRRD (compressed layers)
+    if layer_values and label_values:
+        assert len(layer_values) == len(label_values)
+        if len(data.shape) == 3:
+            x_dim, y_dim, z_dim = data.shape
+        elif len(data.shape) == 4:
+            x_dim, y_dim, z_dim = data.shape[1:]
+        num_segments = len(layer_values)
+        one_hot = np.zeros((num_segments, x_dim, y_dim, z_dim))
+        if segment_indices is None:
+            segment_indices = list(range(num_segments))
+        elif isinstance(segment_indices, int):
+            segment_indices = [segment_indices]
+        elif not isinstance(segment_indices, list):
+            print("incorrectly specified segment indices")
+            return
+        # Check if NRRD is composed of one layer 0
+        if np.max(layer_values) == 0:
+            for i, seg_idx in enumerate(segment_indices):
+                layer = layer_values[seg_idx]
+                label = label_values[seg_idx]
+                one_hot[i] = 1*(data == label).astype(np.uint8)
+        else:
+            for i, seg_idx in enumerate(segment_indices):
+                layer = layer_values[seg_idx]
+                label = label_values[seg_idx]
+                one_hot[i] = 1*(data[layer] == label).astype(np.uint8)
+    # Binary labelmap
+    elif len(data.shape) == 3:
+        x_dim, y_dim, z_dim = data.shape
+        num_segments = np.max(data)
+        one_hot = np.zeros((num_segments, x_dim, y_dim, z_dim))
+        if segment_indices is None:
+            segment_indices = list(range(1, num_segments + 1))
+        elif isinstance(segment_indices, int):
+            segment_indices = [segment_indices]
+        elif not isinstance(segment_indices, list):
+            print("incorrectly specified segment indices")
+            return
+        for i, seg_idx in enumerate(segment_indices):
+            one_hot[i] = 1*(data == seg_idx).astype(np.uint8)
+    # Older Slicer NRRD (already one-hot)
+    else:
+        return data
+    return one_hot
+def get_layer_values(header):
+    layer_values = []
+    num_segments = len([key for key in header.keys() if "Layer" in key])
+    for i in range(num_segments):
+        layer_values.append(int(header['Segment{}_Layer'.format(i)]))
+    return layer_values
+def get_label_values(header):
+    label_values = []
+    num_segments = len([key for key in header.keys() if "LabelValue" in key])
+    for i in range(num_segments):
+        label_values.append(int(header['Segment{}_LabelValue'.format(i)]))
+    return label_values
+def main():
+    args = parse_command_line()
+    base = args.bp
+    gt_path = args.gp
+    pred_path = args.pp
+    if args.sp is None:
+        save_path = base
+    else:
+        save_path = args.sp
+    validation_type = args.vt
+    probability_map_path = args.pm
+    filename = args.fn
+    reg = args.reg
+    seg_type = args.tp
+    label_list = args.sl
+    current_prefix = args.cp
+    if probability_map_path is not None:
+        probability_map = np.loadtxt(os.path.join(base, probability_map_path))
+    else:
+        probability_map = None
+    dsc = False
+    ahd = False
+    whd = False
+    for i in range(len(validation_type)):
+        if validation_type[i] == 'dsc':
+            dsc = True
+        elif validation_type[i] == 'ahd':
+            ahd = True
+        elif validation_type[i] == 'whd':
+            whd = True
+        else:
+            print('wrong validation type, please choose correct one !!!')
+            return
+    filepath = os.path.join(base, save_path, 'output_' + filename + '.csv')
+    save_dir = os.path.join(base, save_path)
+    gt_output_path = checkSegFormat(base, gt_path, 'gt', current_prefix)
+    pred_output_path = checkSegFormat(base, pred_path, 'pred', current_prefix)
+    try:
+        os.mkdir(save_dir)
+    except:
+        print(f'{save_dir} already exists')
+    try:
+        os.mknod(filepath)
+    except:
+        print(f'{filepath} already exists')
+    DSC = pd.DataFrame()
+    file = glob.glob(os.path.join(base, gt_output_path) + '/*nii.gz')[0]
+    seg_file = ants.image_read(file)
+    num_class = np.unique(seg_file.numpy().ravel()).shape[0]
+    average_DSC = np.zeros((num_class-1))
+    average_HD = np.zeros((num_class-1))
+    k = 0
+    for i in glob.glob(os.path.join(base, pred_output_path) + '/*nii.gz'):
+        k += 1
+        pred_img = ants.image_read(i)
+        pred_spacing = list(pred_img.spacing)
+        if reg and seg_type == 'ET':
+            file_name = os.path.basename(i).split('.')[0].split('_')[4] + '_' + os.path.basename(
+                i).split('.')[0].split('_')[5] + '_' + os.path.basename(i).split('.')[0].split('_')[6]
+            file_name1 = os.path.basename(i).split('.')[0]
+        elif reg and seg_type == 'NC':
+            file_name = os.path.basename(i).split(
+                '.')[0].split('_')[3] + '_' + os.path.basename(i).split('.')[0].split('_')[4]
+            file_name1 = os.path.basename(i).split('.')[0]
+        elif reg and seg_type == 'HT':
+            file_name = os.path.basename(i).split('.')[0].split('_')[2]
+            file_name1 = os.path.basename(i).split('.')[0]
+        else:
+            file_name = os.path.basename(i).split('.')[0]
+            file_name1 = os.path.basename(i).split('.')[0]
+        gt_seg = os.path.join(base, gt_output_path, file_name + '.nii.gz')
+        gt_img = ants.image_read(gt_seg)
+        gt_spacing = list(gt_img.spacing)
+        if gt_spacing != pred_spacing:
+            print(
+                "Spacing of prediction and ground_truth is not matched, please check again !!!")
+            return
+        ref = pred_img
+        data_ref = ref.numpy()
+        pred = gt_img
+        data_pred = pred.numpy()
+        num_class = len(np.unique(data_pred))
+        ds, aver_DSC, aver_HD = dice_coefficient_and_hausdorff_distance(
+            file_name1, data_ref, data_pred, num_class, pred_spacing, probability_map, dsc, ahd, whd, average_DSC, average_HD)
+        DSC = pd.concat([DSC, ds])
+        average_DSC = aver_DSC
+        average_HD = aver_HD
+    avg_DSC = average_DSC / k
+    avg_HD = average_HD / k
+    print(avg_DSC)
+    with open(os.path.join(base, save_path, "metric.txt"), 'w') as f:
+        f.write("Label Value  Label Name  Average Dice Score  Average Mean HD\n")
+        for i in range(len(avg_DSC)):
+            f.write(f'{str(i+1):^12}{str(label_list[2*i+1]):^12}{str(avg_DSC[i]):^20}{str(avg_HD[i]):^18}\n')
+    DSC.to_csv(filepath)
+if __name__ == '__main__':
+    main()

metrics/surface_distance.py ADDED Viewed

	@@ -0,0 +1,424 @@

+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import lookup_tables  # pylint: disable=relative-beyond-top-level
+import numpy as np
+from scipy import ndimage
+"""
+surface_distance.py
+all of the surface_distance functions are borrowed from DeepMind surface_distance repository
+"""
+def _assert_is_numpy_array(name, array):
+    """Raises an exception if `array` is not a numpy array."""
+    if not isinstance(array, np.ndarray):
+        raise ValueError("The argument {!r} should be a numpy array, not a "
+                         "{}".format(name, type(array)))
+def _check_nd_numpy_array(name, array, num_dims):
+    """Raises an exception if `array` is not a `num_dims`-D numpy array."""
+    if len(array.shape) != num_dims:
+        raise ValueError("The argument {!r} should be a {}D array, not of "
+                         "shape {}".format(name, num_dims, array.shape))
+def _check_2d_numpy_array(name, array):
+    _check_nd_numpy_array(name, array, num_dims=2)
+def _check_3d_numpy_array(name, array):
+    _check_nd_numpy_array(name, array, num_dims=3)
+def _assert_is_bool_numpy_array(name, array):
+    _assert_is_numpy_array(name, array)
+    if array.dtype != np.bool:
+        raise ValueError("The argument {!r} should be a numpy array of type bool, "
+                         "not {}".format(name, array.dtype))
+def _compute_bounding_box(mask):
+    """Computes the bounding box of the masks.
+    This function generalizes to arbitrary number of dimensions great or equal
+    to 1.
+    Args:
+      mask: The 2D or 3D numpy mask, where '0' means background and non-zero means
+        foreground.
+    Returns:
+      A tuple:
+       - The coordinates of the first point of the bounding box (smallest on all
+         axes), or `None` if the mask contains only zeros.
+       - The coordinates of the second point of the bounding box (greatest on all
+         axes), or `None` if the mask contains only zeros.
+    """
+    num_dims = len(mask.shape)
+    bbox_min = np.zeros(num_dims, np.int64)
+    bbox_max = np.zeros(num_dims, np.int64)
+    # max projection to the x0-axis
+    proj_0 = np.amax(mask, axis=tuple(range(num_dims))[1:])
+    idx_nonzero_0 = np.nonzero(proj_0)[0]
+    if len(idx_nonzero_0) == 0:  # pylint: disable=g-explicit-length-test
+        return None, None
+    bbox_min[0] = np.min(idx_nonzero_0)
+    bbox_max[0] = np.max(idx_nonzero_0)
+    # max projection to the i-th-axis for i in {1, ..., num_dims - 1}
+    for axis in range(1, num_dims):
+        max_over_axes = list(range(num_dims))  # Python 3 compatible
+        max_over_axes.pop(axis)  # Remove the i-th dimension from the max
+        max_over_axes = tuple(max_over_axes)  # numpy expects a tuple of ints
+        proj = np.amax(mask, axis=max_over_axes)
+        idx_nonzero = np.nonzero(proj)[0]
+        bbox_min[axis] = np.min(idx_nonzero)
+        bbox_max[axis] = np.max(idx_nonzero)
+    return bbox_min, bbox_max
+def _crop_to_bounding_box(mask, bbox_min, bbox_max):
+    """Crops a 2D or 3D mask to the bounding box specified by `bbox_{min,max}`."""
+    # we need to zeropad the cropped region with 1 voxel at the lower,
+    # the right (and the back on 3D) sides. This is required to obtain the
+    # "full" convolution result with the 2x2 (or 2x2x2 in 3D) kernel.
+    # TODO:  This is correct only if the object is interior to the
+    # bounding box.
+    cropmask = np.zeros((bbox_max - bbox_min) + 2, np.uint8)
+    num_dims = len(mask.shape)
+    # pyformat: disable
+    if num_dims == 2:
+        cropmask[0:-1, 0:-1] = mask[bbox_min[0]:bbox_max[0] + 1,
+                                    bbox_min[1]:bbox_max[1] + 1]
+    elif num_dims == 3:
+        cropmask[0:-1, 0:-1, 0:-1] = mask[bbox_min[0]:bbox_max[0] + 1,
+                                          bbox_min[1]:bbox_max[1] + 1,
+                                          bbox_min[2]:bbox_max[2] + 1]
+    # pyformat: enable
+    else:
+        assert False
+    return cropmask
+def _sort_distances_surfels(distances, surfel_areas):
+    """Sorts the two list with respect to the tuple of (distance, surfel_area).
+    Args:
+      distances: The distances from A to B (e.g. `distances_gt_to_pred`).
+      surfel_areas: The surfel areas for A (e.g. `surfel_areas_gt`).
+    Returns:
+      A tuple of the sorted (distances, surfel_areas).
+    """
+    sorted_surfels = np.array(sorted(zip(distances, surfel_areas)))
+    return sorted_surfels[:, 0], sorted_surfels[:, 1]
+def compute_surface_distances(mask_gt,
+                              mask_pred,
+                              spacing_mm):
+    """Computes closest distances from all surface points to the other surface.
+    This function can be applied to 2D or 3D tensors. For 2D, both masks must be
+    2D and `spacing_mm` must be a 2-element list. For 3D, both masks must be 3D
+    and `spacing_mm` must be a 3-element list. The description is done for the 2D
+    case, and the formulation for the 3D case is present is parenthesis,
+    introduced by "resp.".
+    Finds all contour elements (resp surface elements "surfels" in 3D) in the
+    ground truth mask `mask_gt` and the predicted mask `mask_pred`, computes their
+    length in mm (resp. area in mm^2) and the distance to the closest point on the
+    other contour (resp. surface). It returns two sorted lists of distances
+    together with the corresponding contour lengths (resp. surfel areas). If one
+    of the masks is empty, the corresponding lists are empty and all distances in
+    the other list are `inf`.
+    Args:
+      mask_gt: 2-dim (resp. 3-dim) bool Numpy array. The ground truth mask.
+      mask_pred: 2-dim (resp. 3-dim) bool Numpy array. The predicted mask.
+      spacing_mm: 2-element (resp. 3-element) list-like structure. Voxel spacing
+        in x0 anx x1 (resp. x0, x1 and x2) directions.
+    Returns:
+      A dict with:
+      "distances_gt_to_pred": 1-dim numpy array of type float. The distances in mm
+          from all ground truth surface elements to the predicted surface,
+          sorted from smallest to largest.
+      "distances_pred_to_gt": 1-dim numpy array of type float. The distances in mm
+          from all predicted surface elements to the ground truth surface,
+          sorted from smallest to largest.
+      "surfel_areas_gt": 1-dim numpy array of type float. The length of the
+        of the ground truth contours in mm (resp. the surface elements area in
+        mm^2) in the same order as distances_gt_to_pred.
+      "surfel_areas_pred": 1-dim numpy array of type float. The length of the
+        of the predicted contours in mm (resp. the surface elements area in
+        mm^2) in the same order as distances_gt_to_pred.
+    Raises:
+      ValueError: If the masks and the `spacing_mm` arguments are of incompatible
+        shape or type. Or if the masks are not 2D or 3D.
+    """
+    # The terms used in this function are for the 3D case. In particular, surface
+    # in 2D stands for contours in 3D. The surface elements in 3D correspond to
+    # the line elements in 2D.
+    _assert_is_bool_numpy_array("mask_gt", mask_gt)
+    _assert_is_bool_numpy_array("mask_pred", mask_pred)
+    if not len(mask_gt.shape) == len(mask_pred.shape) == len(spacing_mm):
+        raise ValueError("The arguments must be of compatible shape. Got mask_gt "
+                         "with {} dimensions ({}) and mask_pred with {} dimensions "
+                         "({}), while the spacing_mm was {} elements.".format(
+                             len(mask_gt.shape),
+                             mask_gt.shape, len(
+                                 mask_pred.shape), mask_pred.shape,
+                             len(spacing_mm)))
+    num_dims = len(spacing_mm)
+    if num_dims == 2:
+        _check_2d_numpy_array("mask_gt", mask_gt)
+        _check_2d_numpy_array("mask_pred", mask_pred)
+        # compute the area for all 16 possible surface elements
+        # (given a 2x2 neighbourhood) according to the spacing_mm
+        neighbour_code_to_surface_area = (
+            lookup_tables.create_table_neighbour_code_to_contour_length(spacing_mm))
+        kernel = lookup_tables.ENCODE_NEIGHBOURHOOD_2D_KERNEL
+        full_true_neighbours = 0b1111
+    elif num_dims == 3:
+        _check_3d_numpy_array("mask_gt", mask_gt)
+        _check_3d_numpy_array("mask_pred", mask_pred)
+        # compute the area for all 256 possible surface elements
+        # (given a 2x2x2 neighbourhood) according to the spacing_mm
+        neighbour_code_to_surface_area = (
+            lookup_tables.create_table_neighbour_code_to_surface_area(spacing_mm))
+        kernel = lookup_tables.ENCODE_NEIGHBOURHOOD_3D_KERNEL
+        full_true_neighbours = 0b11111111
+    else:
+        raise ValueError("Only 2D and 3D masks are supported, not "
+                         "{}D.".format(num_dims))
+    # compute the bounding box of the masks to trim the volume to the smallest
+    # possible processing subvolume
+    bbox_min, bbox_max = _compute_bounding_box(mask_gt | mask_pred)
+    # Both the min/max bbox are None at the same time, so we only check one.
+    if bbox_min is None:
+        return {
+            "distances_gt_to_pred": np.array([]),
+            "distances_pred_to_gt": np.array([]),
+            "surfel_areas_gt": np.array([]),
+            "surfel_areas_pred": np.array([]),
+        }
+    # crop the processing subvolume.
+    cropmask_gt = _crop_to_bounding_box(mask_gt, bbox_min, bbox_max)
+    cropmask_pred = _crop_to_bounding_box(mask_pred, bbox_min, bbox_max)
+    # compute the neighbour code (local binary pattern) for each voxel
+    # the resulting arrays are spacially shifted by minus half a voxel in each
+    # axis.
+    # i.e. the points are located at the corners of the original voxels
+    neighbour_code_map_gt = ndimage.filters.correlate(
+        cropmask_gt.astype(np.uint8), kernel, mode="constant", cval=0)
+    neighbour_code_map_pred = ndimage.filters.correlate(
+        cropmask_pred.astype(np.uint8), kernel, mode="constant", cval=0)
+    # create masks with the surface voxels
+    borders_gt = ((neighbour_code_map_gt != 0) &
+                  (neighbour_code_map_gt != full_true_neighbours))
+    borders_pred = ((neighbour_code_map_pred != 0) &
+                    (neighbour_code_map_pred != full_true_neighbours))
+    # compute the distance transform (closest distance of each voxel to the
+    # surface voxels)
+    if borders_gt.any():
+        distmap_gt = ndimage.morphology.distance_transform_edt(
+            ~borders_gt, sampling=spacing_mm)
+    else:
+        distmap_gt = np.Inf * np.ones(borders_gt.shape)
+    if borders_pred.any():
+        distmap_pred = ndimage.morphology.distance_transform_edt(
+            ~borders_pred, sampling=spacing_mm)
+    else:
+        distmap_pred = np.Inf * np.ones(borders_pred.shape)
+    # compute the area of each surface element
+    surface_area_map_gt = neighbour_code_to_surface_area[neighbour_code_map_gt]
+    surface_area_map_pred = neighbour_code_to_surface_area[
+        neighbour_code_map_pred]
+    # create a list of all surface elements with distance and area
+    distances_gt_to_pred = distmap_pred[borders_gt]
+    distances_pred_to_gt = distmap_gt[borders_pred]
+    surfel_areas_gt = surface_area_map_gt[borders_gt]
+    surfel_areas_pred = surface_area_map_pred[borders_pred]
+    # sort them by distance
+    if distances_gt_to_pred.shape != (0,):
+        distances_gt_to_pred, surfel_areas_gt = _sort_distances_surfels(
+            distances_gt_to_pred, surfel_areas_gt)
+    if distances_pred_to_gt.shape != (0,):
+        distances_pred_to_gt, surfel_areas_pred = _sort_distances_surfels(
+            distances_pred_to_gt, surfel_areas_pred)
+    return {
+        "distances_gt_to_pred": distances_gt_to_pred,
+        "distances_pred_to_gt": distances_pred_to_gt,
+        "surfel_areas_gt": surfel_areas_gt,
+        "surfel_areas_pred": surfel_areas_pred,
+    }
+def compute_average_surface_distance(surface_distances):
+    """Returns the average surface distance.
+    Computes the average surface distances by correctly taking the area of each
+    surface element into account. Call compute_surface_distances(...) before, to
+    obtain the `surface_distances` dict.
+    Args:
+      surface_distances: dict with "distances_gt_to_pred", "distances_pred_to_gt"
+      "surfel_areas_gt", "surfel_areas_pred" created by
+      compute_surface_distances()
+    Returns:
+      A tuple with two float values:
+        - the average distance (in mm) from the ground truth surface to the
+          predicted surface
+        - the average distance from the predicted surface to the ground truth
+          surface.
+    """
+    distances_gt_to_pred = surface_distances["distances_gt_to_pred"]
+    distances_pred_to_gt = surface_distances["distances_pred_to_gt"]
+    surfel_areas_gt = surface_distances["surfel_areas_gt"]
+    surfel_areas_pred = surface_distances["surfel_areas_pred"]
+    average_distance_gt_to_pred = (
+        np.sum(distances_gt_to_pred * surfel_areas_gt) / np.sum(surfel_areas_gt))
+    average_distance_pred_to_gt = (
+        np.sum(distances_pred_to_gt * surfel_areas_pred) /
+        np.sum(surfel_areas_pred))
+    return (average_distance_gt_to_pred, average_distance_pred_to_gt)
+def compute_robust_hausdorff(surface_distances, percent):
+    """Computes the robust Hausdorff distance.
+    Computes the robust Hausdorff distance. "Robust", because it uses the
+    `percent` percentile of the distances instead of the maximum distance. The
+    percentage is computed by correctly taking the area of each surface element
+    into account.
+    Args:
+      surface_distances: dict with "distances_gt_to_pred", "distances_pred_to_gt"
+        "surfel_areas_gt", "surfel_areas_pred" created by
+        compute_surface_distances()
+      percent: a float value between 0 and 100.
+    Returns:
+      a float value. The robust Hausdorff distance in mm.
+    """
+    distances_gt_to_pred = surface_distances["distances_gt_to_pred"]
+    distances_pred_to_gt = surface_distances["distances_pred_to_gt"]
+    surfel_areas_gt = surface_distances["surfel_areas_gt"]
+    surfel_areas_pred = surface_distances["surfel_areas_pred"]
+    if len(distances_gt_to_pred) > 0:  # pylint: disable=g-explicit-length-test
+        surfel_areas_cum_gt = np.cumsum(
+            surfel_areas_gt) / np.sum(surfel_areas_gt)
+        idx = np.searchsorted(surfel_areas_cum_gt, percent/100.0)
+        perc_distance_gt_to_pred = distances_gt_to_pred[
+            min(idx, len(distances_gt_to_pred)-1)]
+    else:
+        perc_distance_gt_to_pred = np.Inf
+    if len(distances_pred_to_gt) > 0:  # pylint: disable=g-explicit-length-test
+        surfel_areas_cum_pred = (np.cumsum(surfel_areas_pred) /
+                                 np.sum(surfel_areas_pred))
+        idx = np.searchsorted(surfel_areas_cum_pred, percent/100.0)
+        perc_distance_pred_to_gt = distances_pred_to_gt[
+            min(idx, len(distances_pred_to_gt)-1)]
+    else:
+        perc_distance_pred_to_gt = np.Inf
+    return max(perc_distance_gt_to_pred, perc_distance_pred_to_gt)
+def compute_surface_overlap_at_tolerance(surface_distances, tolerance_mm):
+    """Computes the overlap of the surfaces at a specified tolerance.
+    Computes the overlap of the ground truth surface with the predicted surface
+    and vice versa allowing a specified tolerance (maximum surface-to-surface
+    distance that is regarded as overlapping). The overlapping fraction is
+    computed by correctly taking the area of each surface element into account.
+    Args:
+      surface_distances: dict with "distances_gt_to_pred", "distances_pred_to_gt"
+        "surfel_areas_gt", "surfel_areas_pred" created by
+        compute_surface_distances()
+      tolerance_mm: a float value. The tolerance in mm
+    Returns:
+      A tuple of two float values. The overlap fraction in [0.0, 1.0] of the
+      ground truth surface with the predicted surface and vice versa.
+    """
+    distances_gt_to_pred = surface_distances["distances_gt_to_pred"]
+    distances_pred_to_gt = surface_distances["distances_pred_to_gt"]
+    surfel_areas_gt = surface_distances["surfel_areas_gt"]
+    surfel_areas_pred = surface_distances["surfel_areas_pred"]
+    rel_overlap_gt = (
+        np.sum(surfel_areas_gt[distances_gt_to_pred <= tolerance_mm]) /
+        np.sum(surfel_areas_gt))
+    rel_overlap_pred = (
+        np.sum(surfel_areas_pred[distances_pred_to_gt <= tolerance_mm]) /
+        np.sum(surfel_areas_pred))
+    return (rel_overlap_gt, rel_overlap_pred)
+def compute_surface_dice_at_tolerance(surface_distances, tolerance_mm):
+    """Computes the _surface_ DICE coefficient at a specified tolerance.
+    Computes the _surface_ DICE coefficient at a specified tolerance. Not to be
+    confused with the standard _volumetric_ DICE coefficient. The surface DICE
+    measures the overlap of two surfaces instead of two volumes. A surface
+    element is counted as overlapping (or touching), when the closest distance to
+    the other surface is less or equal to the specified tolerance. The DICE
+    coefficient is in the range between 0.0 (no overlap) to 1.0 (perfect overlap).
+    Args:
+      surface_distances: dict with "distances_gt_to_pred", "distances_pred_to_gt"
+        "surfel_areas_gt", "surfel_areas_pred" created by
+        compute_surface_distances()
+      tolerance_mm: a float value. The tolerance in mm
+    Returns:
+      A float value. The surface DICE coefficient in [0.0, 1.0].
+    """
+    distances_gt_to_pred = surface_distances["distances_gt_to_pred"]
+    distances_pred_to_gt = surface_distances["distances_pred_to_gt"]
+    surfel_areas_gt = surface_distances["surfel_areas_gt"]
+    surfel_areas_pred = surface_distances["surfel_areas_pred"]
+    overlap_gt = np.sum(surfel_areas_gt[distances_gt_to_pred <= tolerance_mm])
+    overlap_pred = np.sum(
+        surfel_areas_pred[distances_pred_to_gt <= tolerance_mm])
+    surface_dice = (overlap_gt + overlap_pred) / (
+        np.sum(surfel_areas_gt) + np.sum(surfel_areas_pred))
+    return surface_dice
+def compute_dice_coefficient(mask_gt, mask_pred):
+    """Computes soerensen-dice coefficient.
+    compute the soerensen-dice coefficient between the ground truth mask `mask_gt`
+    and the predicted mask `mask_pred`.
+    Args:
+      mask_gt: 3-dim Numpy array of type bool. The ground truth mask.
+      mask_pred: 3-dim Numpy array of type bool. The predicted mask.
+    Returns:
+      the dice coeffcient as float. If both masks are empty, the result is NaN.
+    """
+    volume_sum = mask_gt.sum() + mask_pred.sum()
+    if volume_sum == 0:
+        return np.NaN
+    volume_intersect = (mask_gt & mask_pred).sum()
+    return 2*volume_intersect / volume_sum

nnunet/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from __future__ import absolute_import
+print("\n\nPlease cite the following paper when using nnUNet:\n\nIsensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "
+      "\"nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation.\" "
+      "Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z\n\n")
+print("If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet\n")
+from . import *

nnunet/configuration.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+default_num_threads = 8 if 'nnUNet_def_n_proc' not in os.environ else int(os.environ['nnUNet_def_n_proc'])
+RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD = 3  # determines what threshold to use for resampling the low resolution axis
+# separately (with NN)

nnunet/dataset_conversion/Task017_BeyondCranialVaultAbdominalOrganSegmentation.py ADDED Viewed

	@@ -0,0 +1,94 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+if __name__ == "__main__":
+    base = "/media/yunlu/10TB/research/other_data/Multi-Atlas Labeling Beyond the Cranial Vault/RawData/"
+    task_id = 17
+    task_name = "AbdominalOrganSegmentation"
+    prefix = 'ABD'
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_folder = join(base, "Training/img")
+    label_folder = join(base, "Training/label")
+    test_folder = join(base, "Test/img")
+    train_patient_names = []
+    test_patient_names = []
+    train_patients = subfiles(train_folder, join=False, suffix = 'nii.gz')
+    for p in train_patients:
+        serial_number = int(p[3:7])
+        train_patient_name = f'{prefix}_{serial_number:03d}.nii.gz'
+        label_file = join(label_folder, f'label{p[3:]}')
+        image_file = join(train_folder, p)
+        shutil.copy(image_file, join(imagestr, f'{train_patient_name[:7]}_0000.nii.gz'))
+        shutil.copy(label_file, join(labelstr, train_patient_name))
+        train_patient_names.append(train_patient_name)
+    test_patients = subfiles(test_folder, join=False, suffix=".nii.gz")
+    for p in test_patients:
+        p = p[:-7]
+        image_file = join(test_folder, p + ".nii.gz")
+        serial_number = int(p[3:7])
+        test_patient_name = f'{prefix}_{serial_number:03d}.nii.gz'
+        shutil.copy(image_file, join(imagests, f'{test_patient_name[:7]}_0000.nii.gz'))
+        test_patient_names.append(test_patient_name)
+    json_dict = OrderedDict()
+    json_dict['name'] = "AbdominalOrganSegmentation"
+    json_dict['description'] = "Multi-Atlas Labeling Beyond the Cranial Vault Abdominal Organ Segmentation"
+    json_dict['tensorImageSize'] = "3D"
+    json_dict['reference'] = "https://www.synapse.org/#!Synapse:syn3193805/wiki/217789"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = OrderedDict({
+        "00": "background",
+        "01": "spleen",
+        "02": "right kidney",
+        "03": "left kidney",
+        "04": "gallbladder",
+        "05": "esophagus",
+        "06": "liver",
+        "07": "stomach",
+        "08": "aorta",
+        "09": "inferior vena cava",
+        "10": "portal vein and splenic vein",
+        "11": "pancreas",
+        "12": "right adrenal gland",
+        "13": "left adrenal gland"}
+    )
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s" % train_patient_name, "label": "./labelsTr/%s" % train_patient_name} for i, train_patient_name in enumerate(train_patient_names)]
+    json_dict['test'] = ["./imagesTs/%s" % test_patient_name for test_patient_name in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task024_Promise2012.py ADDED Viewed

	@@ -0,0 +1,81 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+from batchgenerators.utilities.file_and_folder_operations import *
+def export_for_submission(source_dir, target_dir):
+    """
+    promise wants mhd :-/
+    :param source_dir:
+    :param target_dir:
+    :return:
+    """
+    files = subfiles(source_dir, suffix=".nii.gz", join=False)
+    target_files = [join(target_dir, i[:-7] + ".mhd") for i in files]
+    maybe_mkdir_p(target_dir)
+    for f, t in zip(files, target_files):
+        img = sitk.ReadImage(join(source_dir, f))
+        sitk.WriteImage(img, t)
+if __name__ == "__main__":
+    folder = "/media/fabian/My Book/datasets/promise2012"
+    out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task024_Promise"
+    maybe_mkdir_p(join(out_folder, "imagesTr"))
+    maybe_mkdir_p(join(out_folder, "imagesTs"))
+    maybe_mkdir_p(join(out_folder, "labelsTr"))
+    # train
+    current_dir = join(folder, "train")
+    segmentations = subfiles(current_dir, suffix="segmentation.mhd")
+    raw_data = [i for i in subfiles(current_dir, suffix="mhd") if not i.endswith("segmentation.mhd")]
+    for i in raw_data:
+        out_fname = join(out_folder, "imagesTr", i.split("/")[-1][:-4] + "_0000.nii.gz")
+        sitk.WriteImage(sitk.ReadImage(i), out_fname)
+    for i in segmentations:
+        out_fname = join(out_folder, "labelsTr", i.split("/")[-1][:-17] + ".nii.gz")
+        sitk.WriteImage(sitk.ReadImage(i), out_fname)
+    # test
+    current_dir = join(folder, "test")
+    test_data = subfiles(current_dir, suffix="mhd")
+    for i in test_data:
+        out_fname = join(out_folder, "imagesTs", i.split("/")[-1][:-4] + "_0000.nii.gz")
+        sitk.WriteImage(sitk.ReadImage(i), out_fname)
+    json_dict = OrderedDict()
+    json_dict['name'] = "PROMISE12"
+    json_dict['description'] = "prostate"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "prostate"
+    }
+    json_dict['numTraining'] = len(raw_data)
+    json_dict['numTest'] = len(test_data)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-4], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-4]} for i in
+                             raw_data]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-4] for i in test_data]
+    save_json(json_dict, os.path.join(out_folder, "dataset.json"))

nnunet/dataset_conversion/Task027_AutomaticCardiacDetectionChallenge.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import numpy as np
+from sklearn.model_selection import KFold
+def convert_to_submission(source_dir, target_dir):
+    niftis = subfiles(source_dir, join=False, suffix=".nii.gz")
+    patientids = np.unique([i[:10] for i in niftis])
+    maybe_mkdir_p(target_dir)
+    for p in patientids:
+        files_of_that_patient = subfiles(source_dir, prefix=p, suffix=".nii.gz", join=False)
+        assert len(files_of_that_patient)
+        files_of_that_patient.sort()
+        # first is ED, second is ES
+        shutil.copy(join(source_dir, files_of_that_patient[0]), join(target_dir, p + "_ED.nii.gz"))
+        shutil.copy(join(source_dir, files_of_that_patient[1]), join(target_dir, p + "_ES.nii.gz"))
+if __name__ == "__main__":
+    folder = "/media/fabian/My Book/datasets/ACDC/training"
+    folder_test = "/media/fabian/My Book/datasets/ACDC/testing/testing"
+    out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task027_ACDC"
+    maybe_mkdir_p(join(out_folder, "imagesTr"))
+    maybe_mkdir_p(join(out_folder, "imagesTs"))
+    maybe_mkdir_p(join(out_folder, "labelsTr"))
+    # train
+    all_train_files = []
+    patient_dirs_train = subfolders(folder, prefix="patient")
+    for p in patient_dirs_train:
+        current_dir = p
+        data_files_train = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
+        corresponding_seg_files = [i[:-7] + "_gt.nii.gz" for i in data_files_train]
+        for d, s in zip(data_files_train, corresponding_seg_files):
+            patient_identifier = d.split("/")[-1][:-7]
+            all_train_files.append(patient_identifier + "_0000.nii.gz")
+            shutil.copy(d, join(out_folder, "imagesTr", patient_identifier + "_0000.nii.gz"))
+            shutil.copy(s, join(out_folder, "labelsTr", patient_identifier + ".nii.gz"))
+    # test
+    all_test_files = []
+    patient_dirs_test = subfolders(folder_test, prefix="patient")
+    for p in patient_dirs_test:
+        current_dir = p
+        data_files_test = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
+        for d in data_files_test:
+            patient_identifier = d.split("/")[-1][:-7]
+            all_test_files.append(patient_identifier + "_0000.nii.gz")
+            shutil.copy(d, join(out_folder, "imagesTs", patient_identifier + "_0000.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "ACDC"
+    json_dict['description'] = "cardias cine MRI segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see ACDC challenge"
+    json_dict['licence'] = "see ACDC challenge"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "RV",
+        "2": "MLV",
+        "3": "LVC"
+    }
+    json_dict['numTraining'] = len(all_train_files)
+    json_dict['numTest'] = len(all_test_files)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-12], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-12]} for i in
+                             all_train_files]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-12] for i in all_test_files]
+    save_json(json_dict, os.path.join(out_folder, "dataset.json"))
+    # create a dummy split (patients need to be separated)
+    splits = []
+    patients = np.unique([i[:10] for i in all_train_files])
+    patientids = [i[:-12] for i in all_train_files]
+    kf = KFold(5, True, 12345)
+    for tr, val in kf.split(patients):
+        splits.append(OrderedDict())
+        tr_patients = patients[tr]
+        splits[-1]['train'] = [i[:-12] for i in all_train_files if i[:10] in tr_patients]
+        val_patients = patients[val]
+        splits[-1]['val'] = [i[:-12] for i in all_train_files if i[:10] in val_patients]
+    save_pickle(splits, "/media/fabian/nnunet/Task027_ACDC/splits_final.pkl")

nnunet/dataset_conversion/Task029_LiverTumorSegmentationChallenge.py ADDED Viewed

	@@ -0,0 +1,123 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+from batchgenerators.utilities.file_and_folder_operations import *
+from multiprocessing import Pool
+import numpy as np
+from nnunet.configuration import default_num_threads
+from scipy.ndimage import label
+def export_segmentations(indir, outdir):
+    niftis = subfiles(indir, suffix='nii.gz', join=False)
+    for n in niftis:
+        identifier = str(n.split("_")[-1][:-7])
+        outfname = join(outdir, "test-segmentation-%s.nii" % identifier)
+        img = sitk.ReadImage(join(indir, n))
+        sitk.WriteImage(img, outfname)
+def export_segmentations_postprocess(indir, outdir):
+    maybe_mkdir_p(outdir)
+    niftis = subfiles(indir, suffix='nii.gz', join=False)
+    for n in niftis:
+        print("\n", n)
+        identifier = str(n.split("_")[-1][:-7])
+        outfname = join(outdir, "test-segmentation-%s.nii" % identifier)
+        img = sitk.ReadImage(join(indir, n))
+        img_npy = sitk.GetArrayFromImage(img)
+        lmap, num_objects = label((img_npy > 0).astype(int))
+        sizes = []
+        for o in range(1, num_objects + 1):
+            sizes.append((lmap == o).sum())
+        mx = np.argmax(sizes) + 1
+        print(sizes)
+        img_npy[lmap != mx] = 0
+        img_new = sitk.GetImageFromArray(img_npy)
+        img_new.CopyInformation(img)
+        sitk.WriteImage(img_new, outfname)
+if __name__ == "__main__":
+    train_dir = "/media/fabian/DeepLearningData/tmp/LITS-Challenge-Train-Data"
+    test_dir = "/media/fabian/My Book/datasets/LiTS/test_data"
+    output_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task029_LITS"
+    img_dir = join(output_folder, "imagesTr")
+    lab_dir = join(output_folder, "labelsTr")
+    img_dir_te = join(output_folder, "imagesTs")
+    maybe_mkdir_p(img_dir)
+    maybe_mkdir_p(lab_dir)
+    maybe_mkdir_p(img_dir_te)
+    def load_save_train(args):
+        data_file, seg_file = args
+        pat_id = data_file.split("/")[-1]
+        pat_id = "train_" + pat_id.split("-")[-1][:-4]
+        img_itk = sitk.ReadImage(data_file)
+        sitk.WriteImage(img_itk, join(img_dir, pat_id + "_0000.nii.gz"))
+        img_itk = sitk.ReadImage(seg_file)
+        sitk.WriteImage(img_itk, join(lab_dir, pat_id + ".nii.gz"))
+        return pat_id
+    def load_save_test(args):
+        data_file = args
+        pat_id = data_file.split("/")[-1]
+        pat_id = "test_" + pat_id.split("-")[-1][:-4]
+        img_itk = sitk.ReadImage(data_file)
+        sitk.WriteImage(img_itk, join(img_dir_te, pat_id + "_0000.nii.gz"))
+        return pat_id
+    nii_files_tr_data = subfiles(train_dir, True, "volume", "nii", True)
+    nii_files_tr_seg = subfiles(train_dir, True, "segmen", "nii", True)
+    nii_files_ts = subfiles(test_dir, True, "test-volume", "nii", True)
+    p = Pool(default_num_threads)
+    train_ids = p.map(load_save_train, zip(nii_files_tr_data, nii_files_tr_seg))
+    test_ids = p.map(load_save_test, nii_files_ts)
+    p.close()
+    p.join()
+    json_dict = OrderedDict()
+    json_dict['name'] = "LITS"
+    json_dict['description'] = "LITS"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "liver",
+        "2": "tumor"
+    }
+    json_dict['numTraining'] = len(train_ids)
+    json_dict['numTest'] = len(test_ids)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in train_ids]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_ids]
+    with open(os.path.join(output_folder, "dataset.json"), 'w') as f:
+        json.dump(json_dict, f, indent=4, sort_keys=True)

nnunet/dataset_conversion/Task032_BraTS_2018.py ADDED Viewed

	@@ -0,0 +1,176 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from multiprocessing.pool import Pool
+import numpy as np
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.dataset_conversion.Task043_BraTS_2019 import copy_BraTS_segmentation_and_convert_labels
+from nnunet.paths import nnUNet_raw_data
+import SimpleITK as sitk
+import shutil
+def convert_labels_back_to_BraTS(seg: np.ndarray):
+    new_seg = np.zeros_like(seg)
+    new_seg[seg == 1] = 2
+    new_seg[seg == 3] = 4
+    new_seg[seg == 2] = 1
+    return new_seg
+def load_convert_save(filename, input_folder, output_folder):
+    a = sitk.ReadImage(join(input_folder, filename))
+    b = sitk.GetArrayFromImage(a)
+    c = convert_labels_back_to_BraTS(b)
+    d = sitk.GetImageFromArray(c)
+    d.CopyInformation(a)
+    sitk.WriteImage(d, join(output_folder, filename))
+def convert_labels_back_to_BraTS_2018_2019_convention(input_folder: str, output_folder: str, num_processes: int = 12):
+    """
+    reads all prediction files (nifti) in the input folder, converts the labels back to BraTS convention and saves the
+    result in output_folder
+    :param input_folder:
+    :param output_folder:
+    :return:
+    """
+    maybe_mkdir_p(output_folder)
+    nii = subfiles(input_folder, suffix='.nii.gz', join=False)
+    p = Pool(num_processes)
+    p.starmap(load_convert_save, zip(nii, [input_folder] * len(nii), [output_folder] * len(nii)))
+    p.close()
+    p.join()
+if __name__ == "__main__":
+    """
+    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+    """
+    task_name = "Task032_BraTS2018"
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Training"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    patient_names = []
+    for tpe in ["HGG", "LGG"]:
+        cur = join(downloaded_data_dir, tpe)
+        for p in subdirs(cur, join=False):
+            patdir = join(cur, p)
+            patient_name = tpe + "__" + p
+            patient_names.append(patient_name)
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            seg = join(patdir, p + "_seg.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+                isfile(seg)
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+            copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "BraTS2018"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see BraTS2018"
+    json_dict['licence'] = "see BraTS2019 license"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "T1",
+        "1": "T1ce",
+        "2": "T2",
+        "3": "FLAIR"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "edema",
+        "2": "non-enhancing",
+        "3": "enhancing",
+    }
+    json_dict['numTraining'] = len(patient_names)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_names]
+    json_dict['test'] = []
+    save_json(json_dict, join(target_base, "dataset.json"))
+    del tpe, cur
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Validation"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Testing_FIsensee"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))

nnunet/dataset_conversion/Task035_ISBI_MSLesionSegmentationChallenge.py ADDED Viewed

	@@ -0,0 +1,162 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from collections import OrderedDict
+import numpy as np
+import SimpleITK as sitk
+import multiprocessing
+from batchgenerators.utilities.file_and_folder_operations import *
+def convert_to_nii_gz(filename):
+    f = sitk.ReadImage(filename)
+    sitk.WriteImage(f, os.path.splitext(filename)[0] + ".nii.gz")
+    os.remove(filename)
+def convert_for_submission(source_dir, target_dir):
+    files = subfiles(source_dir, suffix=".nii.gz", join=False)
+    maybe_mkdir_p(target_dir)
+    for f in files:
+        splitted = f.split("__")
+        case_id = int(splitted[1])
+        timestep = int(splitted[2][:-7])
+        t = join(target_dir, "test%02d_%02d_nnUNet.nii" % (case_id, timestep))
+        img = sitk.ReadImage(join(source_dir, f))
+        sitk.WriteImage(img, t)
+if __name__ == "__main__":
+    # convert to nifti.gz
+    dirs = ['/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr',
+            '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs',
+            '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr']
+    p = multiprocessing.Pool(3)
+    for d in dirs:
+        nii_files = subfiles(d, suffix='.nii')
+        p.map(convert_to_nii_gz, nii_files)
+    p.close()
+    p.join()
+    def rename_files(folder):
+        all_files = subfiles(folder, join=False)
+        # there are max 14 patients per folder, starting with 1
+        for patientid in range(1, 15):
+            # there are certainly no more than 10 time steps per patient, starting with 1
+            for t in range(1, 10):
+                patient_files = [i for i in all_files if i.find("%02.0d_%02.0d_" % (patientid, t)) != -1]
+                if not len(patient_files) == 4:
+                    continue
+                flair_file = [i for i in patient_files if i.endswith("_flair_pp.nii.gz")][0]
+                mprage_file = [i for i in patient_files if i.endswith("_mprage_pp.nii.gz")][0]
+                pd_file = [i for i in patient_files if i.endswith("_pd_pp.nii.gz")][0]
+                t2_file = [i for i in patient_files if i.endswith("_t2_pp.nii.gz")][0]
+                os.rename(join(folder, flair_file), join(folder, "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t)))
+                os.rename(join(folder, mprage_file), join(folder, "case__%02.0d__%02.0d_0001.nii.gz" % (patientid, t)))
+                os.rename(join(folder, pd_file), join(folder, "case__%02.0d__%02.0d_0002.nii.gz" % (patientid, t)))
+                os.rename(join(folder, t2_file), join(folder, "case__%02.0d__%02.0d_0003.nii.gz" % (patientid, t)))
+    for d in dirs[:-1]:
+        rename_files(d)
+    # now we have to deal with the training masks, we do it the quick and dirty way here by just creating copies of the
+    # training data
+    train_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr'
+    for patientid in range(1, 6):
+        for t in range(1, 6):
+            fnames_original = subfiles(train_folder, prefix="case__%02.0d__%02.0d" % (patientid, t), suffix=".nii.gz", sort=True)
+            for f in fnames_original:
+                for mask in [1, 2]:
+                    fname_target = f[:-12] + "__mask%d" % mask + f[-12:]
+                    shutil.copy(f, fname_target)
+                os.remove(f)
+    labels_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr'
+    for patientid in range(1, 6):
+        for t in range(1, 6):
+            for mask in [1, 2]:
+                f = join(labels_folder, "training%02d_%02d_mask%d.nii.gz" % (patientid, t, mask))
+                if isfile(f):
+                    os.rename(f, join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask)))
+    tr_files = []
+    for patientid in range(1, 6):
+        for t in range(1, 6):
+            for mask in [1, 2]:
+                if isfile(join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))):
+                    tr_files.append("case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))
+    ts_files = []
+    for patientid in range(1, 20):
+        for t in range(1, 20):
+            if isfile(join("/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs",
+                           "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t))):
+                ts_files.append("case__%02.0d__%02.0d.nii.gz" % (patientid, t))
+    out_base = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/'
+    json_dict = OrderedDict()
+    json_dict['name'] = "ISBI_Lesion_Segmentation_Challenge_2015"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "flair",
+        "1": "mprage",
+        "2": "pd",
+        "3": "t2"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "lesion"
+    }
+    json_dict['numTraining'] = len(subfiles(labels_folder))
+    json_dict['numTest'] = len(subfiles('/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs')) // 4
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i[:-7], "label": "./labelsTr/%s.nii.gz" % i[:-7]} for i in
+                             tr_files]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i[:-7] for i in ts_files]
+    save_json(json_dict, join(out_base, "dataset.json"))
+    case_identifiers = np.unique([i[:-12] for i in subfiles("/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task035_ISBILesionSegmentation/imagesTr", suffix='.nii.gz', join=False)])
+    splits = []
+    for f in range(5):
+        cases = [i for i in range(1, 6) if i != f+1]
+        splits.append(OrderedDict())
+        splits[-1]['val'] = np.array([i for i in case_identifiers if i.startswith("case__%02d__" % (f + 1))])
+        remaining = [i for i in case_identifiers if i not in splits[-1]['val']]
+        splits[-1]['train'] = np.array(remaining)
+    maybe_mkdir_p("/media/fabian/nnunet/Task035_ISBILesionSegmentation")
+    save_pickle(splits, join("/media/fabian/nnunet/Task035_ISBILesionSegmentation", "splits_final.pkl"))

nnunet/dataset_conversion/Task037_038_Chaos_Challenge.py ADDED Viewed

	@@ -0,0 +1,460 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from PIL import Image
+import shutil
+from collections import OrderedDict
+import dicom2nifti
+import numpy as np
+from batchgenerators.utilities.data_splitting import get_split_deterministic
+from batchgenerators.utilities.file_and_folder_operations import *
+from PIL import Image
+import SimpleITK as sitk
+from nnunet.paths import preprocessing_output_dir, nnUNet_raw_data
+from nnunet.utilities.sitk_stuff import copy_geometry
+from nnunet.inference.ensemble_predictions import merge
+def load_png_stack(folder):
+    pngs = subfiles(folder, suffix="png")
+    pngs.sort()
+    loaded = []
+    for p in pngs:
+        loaded.append(np.array(Image.open(p)))
+    loaded = np.stack(loaded, 0)[::-1]
+    return loaded
+def convert_CT_seg(loaded_png):
+    return loaded_png.astype(np.uint16)
+def convert_MR_seg(loaded_png):
+    result = np.zeros(loaded_png.shape)
+    result[(loaded_png > 55) & (loaded_png <= 70)] = 1 # liver
+    result[(loaded_png > 110) & (loaded_png <= 135)] = 2 # right kidney
+    result[(loaded_png > 175) & (loaded_png <= 200)] = 3 # left kidney
+    result[(loaded_png > 240) & (loaded_png <= 255)] = 4 # spleen
+    return result
+def convert_seg_to_intensity_task5(seg):
+    seg_new = np.zeros(seg.shape, dtype=np.uint8)
+    seg_new[seg == 1] = 63
+    seg_new[seg == 2] = 126
+    seg_new[seg == 3] = 189
+    seg_new[seg == 4] = 252
+    return seg_new
+def convert_seg_to_intensity_task3(seg):
+    seg_new = np.zeros(seg.shape, dtype=np.uint8)
+    seg_new[seg == 1] = 63
+    return seg_new
+def write_pngs_from_nifti(nifti, output_folder, converter=convert_seg_to_intensity_task3):
+    npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti))
+    seg_new = converter(npy)
+    for z in range(len(npy)):
+        Image.fromarray(seg_new[z]).save(join(output_folder, "img%03.0d.png" % z))
+def convert_variant2_predicted_test_to_submission_format(folder_with_predictions,
+                                                         output_folder="/home/fabian/drives/datasets/results/nnUNet/test_sets/Task038_CHAOS_Task_3_5_Variant2/ready_to_submit",
+                                                         postprocessing_file="/home/fabian/drives/datasets/results/nnUNet/ensembles/Task038_CHAOS_Task_3_5_Variant2/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json"):
+    """
+    output_folder is where the extracted template is
+    :param folder_with_predictions:
+    :param output_folder:
+    :return:
+    """
+    postprocessing_file = "/media/fabian/Results/nnUNet/3d_fullres/Task039_CHAOS_Task_3_5_Variant2_highres/" \
+                          "nnUNetTrainerV2__nnUNetPlansfixed/postprocessing.json"
+    # variant 2 treats in and out phase as two training examples, so we need to ensemble these two again
+    final_predictions_folder = join(output_folder, "final")
+    maybe_mkdir_p(final_predictions_folder)
+    t1_patient_names = [i.split("_")[-1][:-7] for i in subfiles(folder_with_predictions, prefix="T1", suffix=".nii.gz", join=False)]
+    folder_for_ensembing0 = join(output_folder, "ens0")
+    folder_for_ensembing1 = join(output_folder, "ens1")
+    maybe_mkdir_p(folder_for_ensembing0)
+    maybe_mkdir_p(folder_for_ensembing1)
+    # now copy all t1 out phases in ens0 and all in phases in ens1. Name them the same.
+    for t1 in t1_patient_names:
+        shutil.copy(join(folder_with_predictions, "T1_in_%s.npz" % t1), join(folder_for_ensembing1, "T1_%s.npz" % t1))
+        shutil.copy(join(folder_with_predictions, "T1_in_%s.pkl" % t1), join(folder_for_ensembing1, "T1_%s.pkl" % t1))
+        shutil.copy(join(folder_with_predictions, "T1_out_%s.npz" % t1), join(folder_for_ensembing0, "T1_%s.npz" % t1))
+        shutil.copy(join(folder_with_predictions, "T1_out_%s.pkl" % t1), join(folder_for_ensembing0, "T1_%s.pkl" % t1))
+    shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing0, "plans.pkl"))
+    shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing1, "plans.pkl"))
+    # there is a problem with T1_35 that I need to correct manually (different crop size, will not negatively impact results)
+    #ens0_softmax = np.load(join(folder_for_ensembing0, "T1_35.npz"))['softmax']
+    ens1_softmax = np.load(join(folder_for_ensembing1, "T1_35.npz"))['softmax']
+    #ens0_props = load_pickle(join(folder_for_ensembing0, "T1_35.pkl"))
+    #ens1_props = load_pickle(join(folder_for_ensembing1, "T1_35.pkl"))
+    ens1_softmax = ens1_softmax[:, :, :-1, :]
+    np.savez_compressed(join(folder_for_ensembing1, "T1_35.npz"), softmax=ens1_softmax)
+    shutil.copy(join(folder_for_ensembing0, "T1_35.pkl"), join(folder_for_ensembing1, "T1_35.pkl"))
+    # now call my ensemble function
+    merge((folder_for_ensembing0, folder_for_ensembing1), final_predictions_folder, 8, True,
+          postprocessing_file=postprocessing_file)
+    # copy t2 files to final_predictions_folder as well
+    t2_files = subfiles(folder_with_predictions, prefix="T2", suffix=".nii.gz", join=False)
+    for t2 in t2_files:
+        shutil.copy(join(folder_with_predictions, t2), join(final_predictions_folder, t2))
+    # apply postprocessing
+    from nnunet.postprocessing.connected_components import apply_postprocessing_to_folder, load_postprocessing
+    postprocessed_folder = join(output_folder, "final_postprocessed")
+    for_which_classes, min_valid_obj_size = load_postprocessing(postprocessing_file)
+    apply_postprocessing_to_folder(final_predictions_folder, postprocessed_folder,
+                                   for_which_classes, min_valid_obj_size, 8)
+    # now export the niftis in the weird png format
+    # task 3
+    output_dir = join(output_folder, "CHAOS_submission_template_new", "Task3", "MR")
+    for t1 in t1_patient_names:
+        output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
+        nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3)
+    for t2 in t2_files:
+        patname = t2.split("_")[-1][:-7]
+        output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
+        nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3)
+    # task 5
+    output_dir = join(output_folder, "CHAOS_submission_template_new", "Task5", "MR")
+    for t1 in t1_patient_names:
+        output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
+        nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)
+    for t2 in t2_files:
+        patname = t2.split("_")[-1][:-7]
+        output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
+        nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)
+if __name__ == "__main__":
+    """
+    This script only prepares data to participate in Task 5 and Task 5. I don't like the CT task because
+    1) there are
+    no abdominal organs in the ground truth. In the case of CT we are supposed to train only liver while on MRI we are
+    supposed to train all organs. This would require manual modification of nnU-net to deal with this dataset. This is
+    not what nnU-net is about.
+    2) CT Liver or multiorgan segmentation is too easy to get external data for. Therefore the challenges comes down
+    to who gets the b est external data, not who has the best algorithm. Not super interesting.
+    Task 3 is a subtask of Task 5 so we need to prepare the data only once.
+    Difficulty: We need to process both T1 and T2, but T1 has 2 'modalities' (phases). nnU-Net cannot handly varying
+    number of input channels. We need to be creative.
+    We deal with this by preparing 2 Variants:
+    1) pretend we have 2 modalities for T2 as well by simply stacking a copy of the data
+    2) treat all MRI sequences independently, so we now have 3*20 training data instead of 2*20. In inference we then
+    ensemble the results for the two t1 modalities.
+    Careful: We need to split manually here to ensure we stratify by patient
+    """
+    root = "/media/fabian/My Book/datasets/CHAOS_challenge/Train_Sets"
+    root_test = "/media/fabian/My Book/datasets/CHAOS_challenge/Test_Sets"
+    out_base = nnUNet_raw_data
+    # CT
+    # we ignore CT because
+    ##############################################################
+    # Variant 1
+    ##############################################################
+    patient_ids = []
+    patient_ids_test = []
+    output_folder = join(out_base, "Task037_CHAOS_Task_3_5_Variant1")
+    output_images = join(output_folder, "imagesTr")
+    output_labels = join(output_folder, "labelsTr")
+    output_imagesTs = join(output_folder, "imagesTs")
+    maybe_mkdir_p(output_images)
+    maybe_mkdir_p(output_labels)
+    maybe_mkdir_p(output_imagesTs)
+    # Process T1 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T1_" + p
+        gt_dir = join(d, p, "T1DUAL", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_images, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_images, patient_name + "_0001.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
+        patient_ids.append(patient_name)
+    # Process T1 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T1_" + p
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_imagesTs, patient_name + "_0001.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name)
+    # Process T2 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_images, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        shutil.copy(join(output_images, patient_name + "_0000.nii.gz"), join(output_images, patient_name + "_0001.nii.gz"))
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
+        patient_ids.append(patient_name)
+    # Process T2 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        shutil.copy(join(output_imagesTs, patient_name + "_0000.nii.gz"), join(output_imagesTs, patient_name + "_0001.nii.gz"))
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name)
+    json_dict = OrderedDict()
+    json_dict['name'] = "Chaos Challenge Task3/5 Variant 1"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "https://chaos.grand-challenge.org/Data/"
+    json_dict['licence'] = "see https://chaos.grand-challenge.org/Data/"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+        "1": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "liver",
+        "2": "right kidney",
+        "3": "left kidney",
+        "4": "spleen",
+    }
+    json_dict['numTraining'] = len(patient_ids)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_ids]
+    json_dict['test'] = []
+    save_json(json_dict, join(output_folder, "dataset.json"))
+    ##############################################################
+    # Variant 2
+    ##############################################################
+    patient_ids = []
+    patient_ids_test = []
+    output_folder = join(out_base, "Task038_CHAOS_Task_3_5_Variant2")
+    output_images = join(output_folder, "imagesTr")
+    output_imagesTs = join(output_folder, "imagesTs")
+    output_labels = join(output_folder, "labelsTr")
+    maybe_mkdir_p(output_images)
+    maybe_mkdir_p(output_imagesTs)
+    maybe_mkdir_p(output_labels)
+    # Process T1 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name_in = "T1_in_" + p
+        patient_name_out = "T1_out_" + p
+        gt_dir = join(d, p, "T1DUAL", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_images, patient_name_in + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_images, patient_name_out + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name_in + ".nii.gz"))
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name_out + ".nii.gz"))
+        patient_ids.append(patient_name_out)
+        patient_ids.append(patient_name_in)
+    # Process T1 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name_in = "T1_in_" + p
+        patient_name_out = "T1_out_" + p
+        gt_dir = join(d, p, "T1DUAL", "Ground")
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_imagesTs, patient_name_in + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_imagesTs, patient_name_out + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name_out)
+        patient_ids_test.append(patient_name_in)
+    # Process T2 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_images, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
+        patient_ids.append(patient_name)
+    # Process T2 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name)
+    json_dict = OrderedDict()
+    json_dict['name'] = "Chaos Challenge Task3/5 Variant 2"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "https://chaos.grand-challenge.org/Data/"
+    json_dict['licence'] = "see https://chaos.grand-challenge.org/Data/"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "liver",
+        "2": "right kidney",
+        "3": "left kidney",
+        "4": "spleen",
+    }
+    json_dict['numTraining'] = len(patient_ids)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_ids]
+    json_dict['test'] = []
+    save_json(json_dict, join(output_folder, "dataset.json"))
+    #################################################
+    # custom split
+    #################################################
+    patients = subdirs(join(root, "MR"), join=False)
+    task_name_variant1 = "Task037_CHAOS_Task_3_5_Variant1"
+    task_name_variant2 = "Task038_CHAOS_Task_3_5_Variant2"
+    output_preprocessed_v1 = join(preprocessing_output_dir, task_name_variant1)
+    maybe_mkdir_p(output_preprocessed_v1)
+    output_preprocessed_v2 = join(preprocessing_output_dir, task_name_variant2)
+    maybe_mkdir_p(output_preprocessed_v2)
+    splits = []
+    for fold in range(5):
+        tr, val = get_split_deterministic(patients, fold, 5, 12345)
+        train = ["T2_" + i for i in tr] + ["T1_" + i for i in tr]
+        validation = ["T2_" + i for i in val] + ["T1_" + i for i in val]
+        splits.append({
+            'train': train,
+            'val': validation
+        })
+    save_pickle(splits, join(output_preprocessed_v1, "splits_final.pkl"))
+    splits = []
+    for fold in range(5):
+        tr, val = get_split_deterministic(patients, fold, 5, 12345)
+        train = ["T2_" + i for i in tr] + ["T1_in_" + i for i in tr] + ["T1_out_" + i for i in tr]
+        validation = ["T2_" + i for i in val] + ["T1_in_" + i for i in val] + ["T1_out_" + i for i in val]
+        splits.append({
+            'train': train,
+            'val': validation
+        })
+    save_pickle(splits, join(output_preprocessed_v2, "splits_final.pkl"))

nnunet/dataset_conversion/Task040_KiTS.py ADDED Viewed

	@@ -0,0 +1,240 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from copy import deepcopy
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import SimpleITK as sitk
+from multiprocessing import Pool
+from medpy.metric import dc
+import numpy as np
+from nnunet.paths import network_training_output_dir
+from scipy.ndimage import label
+def compute_dice_scores(ref: str, pred: str):
+    ref = sitk.GetArrayFromImage(sitk.ReadImage(ref))
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(pred))
+    kidney_mask_ref = ref > 0
+    kidney_mask_pred = pred > 0
+    if np.sum(kidney_mask_pred) == 0 and kidney_mask_ref.sum() == 0:
+        kidney_dice = np.nan
+    else:
+        kidney_dice = dc(kidney_mask_pred, kidney_mask_ref)
+    tumor_mask_ref = ref == 2
+    tumor_mask_pred = pred == 2
+    if np.sum(tumor_mask_ref) == 0 and tumor_mask_pred.sum() == 0:
+        tumor_dice = np.nan
+    else:
+        tumor_dice = dc(tumor_mask_ref, tumor_mask_pred)
+    geometric_mean = np.mean((kidney_dice, tumor_dice))
+    return kidney_dice, tumor_dice, geometric_mean
+def evaluate_folder(folder_gt: str, folder_pred: str):
+    p = Pool(8)
+    niftis = subfiles(folder_gt, suffix=".nii.gz", join=False)
+    images_gt = [join(folder_gt, i) for i in niftis]
+    images_pred = [join(folder_pred, i) for i in niftis]
+    results = p.starmap(compute_dice_scores, zip(images_gt, images_pred))
+    p.close()
+    p.join()
+    with open(join(folder_pred, "results.csv"), 'w') as f:
+        for i, ni in enumerate(niftis):
+            f.write("%s,%0.4f,%0.4f,%0.4f\n" % (ni, *results[i]))
+def remove_all_but_the_two_largest_conn_comp(img_itk_file: str, file_out: str):
+    """
+    This was not used. I was just curious because others used this. Turns out this is not necessary for my networks
+    """
+    img_itk = sitk.ReadImage(img_itk_file)
+    img_npy = sitk.GetArrayFromImage(img_itk)
+    labelmap, num_labels = label((img_npy > 0).astype(int))
+    if num_labels > 2:
+        label_sizes = []
+        for i in range(1, num_labels + 1):
+            label_sizes.append(np.sum(labelmap == i))
+        argsrt = np.argsort(label_sizes)[::-1] # two largest are now argsrt[0] and argsrt[1]
+        keep_mask = (labelmap == argsrt[0] + 1) | (labelmap == argsrt[1] + 1)
+        img_npy[~keep_mask] = 0
+        new = sitk.GetImageFromArray(img_npy)
+        new.CopyInformation(img_itk)
+        sitk.WriteImage(new, file_out)
+        print(os.path.basename(img_itk_file), num_labels, label_sizes)
+    else:
+        shutil.copy(img_itk_file, file_out)
+def manual_postprocess(folder_in,
+                       folder_out):
+    """
+    This was not used. I was just curious because others used this. Turns out this is not necessary for my networks
+    """
+    maybe_mkdir_p(folder_out)
+    infiles = subfiles(folder_in, suffix=".nii.gz", join=False)
+    outfiles = [join(folder_out, i) for i in infiles]
+    infiles = [join(folder_in, i) for i in infiles]
+    p = Pool(8)
+    _ = p.starmap_async(remove_all_but_the_two_largest_conn_comp, zip(infiles, outfiles))
+    _ = _.get()
+    p.close()
+    p.join()
+def copy_npz_fom_valsets():
+    '''
+    this is preparation for ensembling
+    :return:
+    '''
+    base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
+    folders = ['nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23__nnUNetPlans']
+    for f in folders:
+        out = join(base, f, 'crossval_npz')
+        maybe_mkdir_p(out)
+        shutil.copy(join(base, f, 'plans.pkl'), out)
+        for fold in range(5):
+            cur = join(base, f, 'fold_%d' % fold, 'validation_raw')
+            npz_files = subfiles(cur, suffix='.npz', join=False)
+            pkl_files = [i[:-3] + 'pkl' for i in npz_files]
+            assert all([isfile(join(cur, i)) for i in pkl_files])
+            for n in npz_files:
+                corresponding_pkl = n[:-3] + 'pkl'
+                shutil.copy(join(cur, n), out)
+                shutil.copy(join(cur, corresponding_pkl), out)
+def ensemble(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans'), out_dir="/media/fabian/Results/nnUNet/3d_lowres/Task048_KiTS_clean/ensemble_preactres_and_res"):
+    from nnunet.inference.ensemble_predictions import merge
+    folders = [join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean", i, 'crossval_npz') for i in experiments]
+    merge(folders, out_dir, 8)
+def prepare_submission(fld= "/home/fabian/drives/datasets/results/nnUNet/test_sets/Task048_KiTS_clean/predicted_ens_3d_fullres_3d_cascade_fullres_postprocessed", # '/home/fabian/datasets_fabian/predicted_KiTS_nnUNetTrainerNewCandidate23_FabiansResNet',
+                       out='/home/fabian/drives/datasets/results/nnUNet/test_sets/Task048_KiTS_clean/submission'):
+    nii = subfiles(fld, join=False, suffix='.nii.gz')
+    maybe_mkdir_p(out)
+    for n in nii:
+        outfname = n.replace('case', 'prediction')
+        shutil.copy(join(fld, n), join(out, outfname))
+def pretent_to_be_nnUNetTrainer(base, folds=(0, 1, 2, 3, 4)):
+    """
+    changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
+    :param experiments:
+    :return:
+    """
+    for fold in folds:
+        cur = join(base, "fold_%d" % fold)
+        pkl_file = join(cur, 'model_best.model.pkl')
+        a = load_pickle(pkl_file)
+        a['name_old'] = deepcopy(a['name'])
+        a['name'] = 'nnUNetTrainer'
+        save_pickle(a, pkl_file)
+def reset_trainerName(base, folds=(0, 1, 2, 3, 4)):
+    for fold in folds:
+        cur = join(base, "fold_%d" % fold)
+        pkl_file = join(cur, 'model_best.model.pkl')
+        a = load_pickle(pkl_file)
+        a['name'] = a['name_old']
+        del a['name_old']
+        save_pickle(a, pkl_file)
+def nnUNetTrainer_these(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23__nnUNetPlans')):
+    """
+    changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
+    :param experiments:
+    :return:
+    """
+    base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
+    for exp in experiments:
+        cur = join(base, exp)
+        pretent_to_be_nnUNetTrainer(cur)
+def reset_trainerName_these(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23__nnUNetPlans')):
+    """
+    changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
+    :param experiments:
+    :return:
+    """
+    base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
+    for exp in experiments:
+        cur = join(base, exp)
+        reset_trainerName(cur)
+if __name__ == "__main__":
+    base = "/media/fabian/My Book/datasets/KiTS2019_Challenge/kits19/data"
+    out = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task040_KiTS"
+    cases = subdirs(base, join=False)
+    maybe_mkdir_p(out)
+    maybe_mkdir_p(join(out, "imagesTr"))
+    maybe_mkdir_p(join(out, "imagesTs"))
+    maybe_mkdir_p(join(out, "labelsTr"))
+    for c in cases:
+        case_id = int(c.split("_")[-1])
+        if case_id < 210:
+            shutil.copy(join(base, c, "imaging.nii.gz"), join(out, "imagesTr", c + "_0000.nii.gz"))
+            shutil.copy(join(base, c, "segmentation.nii.gz"), join(out, "labelsTr", c + ".nii.gz"))
+        else:
+            shutil.copy(join(base, c, "imaging.nii.gz"), join(out, "imagesTs", c + "_0000.nii.gz"))
+    json_dict = {}
+    json_dict['name'] = "KiTS"
+    json_dict['description'] = "kidney and kidney tumor segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "KiTS data for nnunet"
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Kidney",
+        "2": "Tumor"
+    }
+    json_dict['numTraining'] = len(cases)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             cases]
+    json_dict['test'] = []
+    save_json(json_dict, os.path.join(out, "dataset.json"))

nnunet/dataset_conversion/Task043_BraTS_2019.py ADDED Viewed

	@@ -0,0 +1,164 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import numpy as np
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+import SimpleITK as sitk
+import shutil
+def copy_BraTS_segmentation_and_convert_labels(in_file, out_file):
+    # use this for segmentation only!!!
+    # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3
+    img = sitk.ReadImage(in_file)
+    img_npy = sitk.GetArrayFromImage(img)
+    uniques = np.unique(img_npy)
+    for u in uniques:
+        if u not in [0, 1, 2, 4]:
+            raise RuntimeError('unexpected label')
+    seg_new = np.zeros_like(img_npy)
+    seg_new[img_npy == 4] = 3
+    seg_new[img_npy == 2] = 1
+    seg_new[img_npy == 1] = 2
+    img_corr = sitk.GetImageFromArray(seg_new)
+    img_corr.CopyInformation(img)
+    sitk.WriteImage(img_corr, out_file)
+if __name__ == "__main__":
+    """
+    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+    """
+    task_name = "Task043_BraTS2019"
+    downloaded_data_dir = "/home/sdp/MLPERF/Brats2019_DATA/MICCAI_BraTS_2019_Data_Training"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    patient_names = []
+    for tpe in ["HGG", "LGG"]:
+        cur = join(downloaded_data_dir, tpe)
+        for p in subdirs(cur, join=False):
+            patdir = join(cur, p)
+            patient_name = tpe + "__" + p
+            patient_names.append(patient_name)
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            seg = join(patdir, p + "_seg.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+                isfile(seg)
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+            copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "BraTS2019"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see BraTS2019"
+    json_dict['licence'] = "see BraTS2019 license"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "T1",
+        "1": "T1ce",
+        "2": "T2",
+        "3": "FLAIR"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "edema",
+        "2": "non-enhancing",
+        "3": "enhancing",
+    }
+    json_dict['numTraining'] = len(patient_names)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_names]
+    json_dict['test'] = []
+    save_json(json_dict, join(target_base, "dataset.json"))
+    downloaded_data_dir = "/home/sdp/MLPERF/Brats2019_DATA/MICCAI_BraTS_2019_Data_Validation"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
+    """
+    #I dont have the testing data
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Testing_FIsensee"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))"""

nnunet/dataset_conversion/Task055_SegTHOR.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import SimpleITK as sitk
+def convert_for_submission(source_dir, target_dir):
+    """
+    I believe they want .nii, not .nii.gz
+    :param source_dir:
+    :param target_dir:
+    :return:
+    """
+    files = subfiles(source_dir, suffix=".nii.gz", join=False)
+    maybe_mkdir_p(target_dir)
+    for f in files:
+        img = sitk.ReadImage(join(source_dir, f))
+        out_file = join(target_dir, f[:-7] + ".nii")
+        sitk.WriteImage(img, out_file)
+if __name__ == "__main__":
+    base = "/media/fabian/DeepLearningData/SegTHOR"
+    task_id = 55
+    task_name = "SegTHOR"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    train_patients = subfolders(join(base, "train"), join=False)
+    for p in train_patients:
+        curr = join(base, "train", p)
+        label_file = join(curr, "GT.nii.gz")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names.append(p)
+    test_patients = subfiles(join(base, "test"), join=False, suffix=".nii.gz")
+    for p in test_patients:
+        p = p[:-7]
+        curr = join(base, "test")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+        test_patient_names.append(p)
+    json_dict = OrderedDict()
+    json_dict['name'] = "SegTHOR"
+    json_dict['description'] = "SegTHOR"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "esophagus",
+        "2": "heart",
+        "3": "trachea",
+        "4": "aorta",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task056_VerSe2019.py ADDED Viewed

	@@ -0,0 +1,274 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+from multiprocessing.pool import Pool
+from nnunet.configuration import default_num_threads
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from medpy import metric
+import numpy as np
+from nnunet.utilities.image_reorientation import reorient_all_images_in_folder_to_ras
+def check_if_all_in_good_orientation(imagesTr_folder: str, labelsTr_folder: str, output_folder: str) -> None:
+    maybe_mkdir_p(output_folder)
+    filenames = subfiles(labelsTr_folder, suffix='.nii.gz', join=False)
+    import matplotlib.pyplot as plt
+    for n in filenames:
+        img = sitk.GetArrayFromImage(sitk.ReadImage(join(imagesTr_folder, n[:-7] + '_0000.nii.gz')))
+        lab = sitk.GetArrayFromImage(sitk.ReadImage(join(labelsTr_folder, n)))
+        assert np.all([i == j for i, j in zip(img.shape, lab.shape)])
+        z_slice = img.shape[0] // 2
+        img_slice = img[z_slice]
+        lab_slice = lab[z_slice]
+        lab_slice[lab_slice != 0] = 1
+        img_slice = img_slice - img_slice.min()
+        img_slice = img_slice / img_slice.max()
+        stacked = np.vstack((img_slice, lab_slice))
+        print(stacked.shape)
+        plt.imsave(join(output_folder, n[:-7] + '.png'), stacked, cmap='gray')
+def evaluate_verse_case(sitk_file_ref:str, sitk_file_test:str):
+    """
+    Only vertebra that are present in the reference will be evaluated
+    :param sitk_file_ref:
+    :param sitk_file_test:
+    :return:
+    """
+    gt_npy = sitk.GetArrayFromImage(sitk.ReadImage(sitk_file_ref))
+    pred_npy = sitk.GetArrayFromImage(sitk.ReadImage(sitk_file_test))
+    dice_scores = []
+    for label in range(1, 26):
+        mask_gt = gt_npy == label
+        if np.sum(mask_gt) > 0:
+            mask_pred = pred_npy == label
+            dc = metric.dc(mask_pred, mask_gt)
+        else:
+            dc = np.nan
+        dice_scores.append(dc)
+    return dice_scores
+def evaluate_verse_folder(folder_pred, folder_gt, out_json="/home/fabian/verse.json"):
+    p = Pool(default_num_threads)
+    files_gt_bare = subfiles(folder_gt, join=False)
+    assert all([isfile(join(folder_pred, i)) for i in files_gt_bare]), "some files are missing in the predicted folder"
+    files_pred = [join(folder_pred, i) for i in files_gt_bare]
+    files_gt = [join(folder_gt, i) for i in files_gt_bare]
+    results = p.starmap_async(evaluate_verse_case, zip(files_gt, files_pred))
+    results = results.get()
+    dct = {i: j for i, j in zip(files_gt_bare, results)}
+    results_stacked = np.vstack(results)
+    results_mean = np.nanmean(results_stacked, 0)
+    overall_mean = np.nanmean(results_mean)
+    save_json((dct, list(results_mean), overall_mean), out_json)
+    p.close()
+    p.join()
+def print_unique_labels_and_their_volumes(image: str, print_only_if_vol_smaller_than: float = None):
+    img = sitk.ReadImage(image)
+    voxel_volume = np.prod(img.GetSpacing())
+    img_npy = sitk.GetArrayFromImage(img)
+    uniques = [i for i in np.unique(img_npy) if i != 0]
+    volumes = {i: np.sum(img_npy == i) * voxel_volume for i in uniques}
+    print('')
+    print(image.split('/')[-1])
+    print('uniques:', uniques)
+    for k in volumes.keys():
+        v = volumes[k]
+        if print_only_if_vol_smaller_than is not None and v > print_only_if_vol_smaller_than:
+            pass
+        else:
+            print('k:', k, '\tvol:', volumes[k])
+def remove_label(label_file: str, remove_this: int, replace_with: int = 0):
+    img = sitk.ReadImage(label_file)
+    img_npy = sitk.GetArrayFromImage(img)
+    img_npy[img_npy == remove_this] = replace_with
+    img2 = sitk.GetImageFromArray(img_npy)
+    img2.CopyInformation(img)
+    sitk.WriteImage(img2, label_file)
+if __name__ == "__main__":
+    ### First we create a nnunet dataset from verse. After this the images will be all willy nilly in their
+    # orientation because that's how VerSe comes
+    base = '/media/fabian/DeepLearningData/VerSe2019'
+    base = "/home/fabian/data/VerSe2019"
+    # correct orientation
+    train_files_base = subfiles(join(base, "train"), join=False, suffix="_seg.nii.gz")
+    train_segs = [i[:-len("_seg.nii.gz")] + "_seg.nii.gz" for i in train_files_base]
+    train_data = [i[:-len("_seg.nii.gz")] + ".nii.gz" for i in train_files_base]
+    test_files_base = [i[:-len(".nii.gz")] for i in subfiles(join(base, "test"), join=False, suffix=".nii.gz")]
+    test_data = [i + ".nii.gz" for i in test_files_base]
+    task_id = 56
+    task_name = "VerSe"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = [i[:-len("_seg.nii.gz")] for i in subfiles(join(base, "train"), join=False, suffix="_seg.nii.gz")]
+    for p in train_patient_names:
+        curr = join(base, "train")
+        label_file = join(curr, p + "_seg.nii.gz")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+    test_patient_names = [i[:-7] for i in subfiles(join(base, "test"), join=False, suffix=".nii.gz")]
+    for p in test_patient_names:
+        curr = join(base, "test")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "VerSe2019"
+    json_dict['description'] = "VerSe2019"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(26)}
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    # now we reorient all those images to ras. This saves a pkl with the original affine. We need this information to
+    # bring our predictions into the same geometry for submission
+    reorient_all_images_in_folder_to_ras(imagestr)
+    reorient_all_images_in_folder_to_ras(imagests)
+    reorient_all_images_in_folder_to_ras(labelstr)
+    # sanity check
+    check_if_all_in_good_orientation(imagestr, labelstr, join(out_base, 'sanitycheck'))
+    # looks good to me - proceed
+    # check the volumes of the vertebrae
+    _ = [print_unique_labels_and_their_volumes(i, 1000) for i in subfiles(labelstr, suffix='.nii.gz')]
+    # some cases appear fishy. For example, verse063.nii.gz has labels [1, 20, 21, 22, 23, 24] and 1 only has a volume
+    # of 63mm^3
+    #let's correct those
+    # 19 is connected to the image border and should not be segmented. Only one slice of 19 is segmented in the
+    # reference. Looks wrong
+    remove_label(join(labelstr, 'verse031.nii.gz'), 19, 0)
+    # spurious annotation of 18 (vol: 8.00)
+    remove_label(join(labelstr, 'verse060.nii.gz'), 18, 0)
+    # spurious annotation of 16 (vol: 3.00)
+    remove_label(join(labelstr, 'verse061.nii.gz'), 16, 0)
+    # spurious annotation of 1 (vol: 63.00) although the rest of the vertebra is [20, 21, 22, 23, 24]
+    remove_label(join(labelstr, 'verse063.nii.gz'), 1, 0)
+    # spurious annotation of 3 (vol: 9.53) although the rest of the vertebra is
+    # [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+    remove_label(join(labelstr, 'verse074.nii.gz'), 3, 0)
+    # spurious annotation of 3 (vol: 15.00)
+    remove_label(join(labelstr, 'verse097.nii.gz'), 3, 0)
+    # spurious annotation of 3 (vol: 10) although the rest of the vertebra is
+    # [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+    remove_label(join(labelstr, 'verse151.nii.gz'), 3, 0)
+    # spurious annotation of 25 (vol: 4) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse201.nii.gz'), 25, 0)
+    # spurious annotation of 23 (vol: 8) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse207.nii.gz'), 23, 0)
+    # spurious annotation of 23 (vol: 12) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse208.nii.gz'), 23, 0)
+    # spurious annotation of 23 (vol: 2) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse212.nii.gz'), 23, 0)
+    # spurious annotation of 20 (vol: 4) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse214.nii.gz'), 20, 0)
+    # spurious annotation of 23 (vol: 15) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse223.nii.gz'), 23, 0)
+    # spurious annotation of 23 (vol: 1) and 25 (vol: 7) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse226.nii.gz'), 23, 0)
+    remove_label(join(labelstr, 'verse226.nii.gz'), 25, 0)
+    # spurious annotation of 25 (vol: 27) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse227.nii.gz'), 25, 0)
+    # spurious annotation of 20 (vol: 24) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse232.nii.gz'), 20, 0)
+    # Now we are ready to run nnU-Net
+    """# run this part of the code once training is done
+    folder_gt = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task056_VerSe/labelsTr"
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_fullres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_fullres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_lowres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_lowres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_cascade_fullres/Task056_VerSe/nnUNetTrainerV2CascadeFullRes__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_cascade_fullres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)"""

nnunet/dataset_conversion/Task056_Verse_normalize_orientation.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+"""
+This code is copied from https://gist.github.com/nlessmann/24d405eaa82abba6676deb6be839266c. All credits go to the
+original author (user nlessmann on GitHub)
+"""
+import numpy as np
+import SimpleITK as sitk
+def reverse_axes(image):
+    return np.transpose(image, tuple(reversed(range(image.ndim))))
+def read_image(imagefile):
+    image = sitk.ReadImage(imagefile)
+    data = reverse_axes(sitk.GetArrayFromImage(image))  # switch from zyx to xyz
+    header = {
+        'spacing': image.GetSpacing(),
+        'origin': image.GetOrigin(),
+        'direction': image.GetDirection()
+    }
+    return data, header
+def save_image(img: np.ndarray, header: dict, output_file: str):
+    """
+    CAREFUL you need to restore_original_slice_orientation before saving!
+    :param img:
+    :param header:
+    :return:
+    """
+    # reverse back
+    img = reverse_axes(img)  # switch from zyx to xyz
+    img_itk = sitk.GetImageFromArray(img)
+    img_itk.SetSpacing(header['spacing'])
+    img_itk.SetOrigin(header['origin'])
+    if not isinstance(header['direction'], tuple):
+        img_itk.SetDirection(header['direction'].flatten())
+    else:
+        img_itk.SetDirection(header['direction'])
+    sitk.WriteImage(img_itk, output_file)
+def swap_flip_dimensions(cosine_matrix, image, header=None):
+    # Compute swaps and flips
+    swap = np.argmax(abs(cosine_matrix), axis=0)
+    flip = np.sum(cosine_matrix, axis=0)
+    # Apply transformation to image volume
+    image = np.transpose(image, tuple(swap))
+    image = image[tuple(slice(None, None, int(f)) for f in flip)]
+    if header is None:
+        return image
+    # Apply transformation to header
+    header['spacing'] = tuple(header['spacing'][s] for s in swap)
+    header['direction'] = np.eye(3)
+    return image, header
+def normalize_slice_orientation(image, header):
+    # Preserve original header so that we can easily transform back
+    header['original'] = header.copy()
+    # Compute inverse of cosine (round first because we assume 0/1 values only)
+    # to determine how the image has to be transposed and flipped for cosine = identity
+    cosine = np.asarray(header['direction']).reshape(3, 3)
+    cosine_inv = np.linalg.inv(np.round(cosine))
+    return swap_flip_dimensions(cosine_inv, image, header)
+def restore_original_slice_orientation(mask, header):
+    # Use original orientation for transformation because we assume the image to be in
+    # normalized orientation, i.e., identity cosine)
+    cosine = np.asarray(header['original']['direction']).reshape(3, 3)
+    cosine_rnd = np.round(cosine)
+    # Apply transformations to both the image and the mask
+    return swap_flip_dimensions(cosine_rnd, mask), header['original']

nnunet/dataset_conversion/Task058_ISBI_EM_SEG.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+from skimage import io
+def export_for_submission(predicted_npz, out_file):
+    """
+    they expect us to submit a 32 bit 3d tif image with values between 0 (100% membrane certainty) and 1
+    (100% non-membrane certainty). We use the softmax output for that
+    :return:
+    """
+    a = np.load(predicted_npz)['softmax']
+    a = a / a.sum(0)[None]
+    # channel 0 is non-membrane prob
+    nonmembr_prob = a[0]
+    assert out_file.endswith(".tif")
+    io.imsave(out_file, nonmembr_prob.astype(np.float32))
+if __name__ == "__main__":
+    # download from here http://brainiac2.mit.edu/isbi_challenge/downloads
+    base = "/media/fabian/My Book/datasets/ISBI_EM_SEG"
+    # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
+    # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
+    train_volume = io.imread(join(base, "train-volume.tif"))
+    train_labels = io.imread(join(base, "train-labels.tif"))
+    train_labels[train_labels == 255] = 1
+    test_volume = io.imread(join(base, "test-volume.tif"))
+    task_id = 58
+    task_name = "ISBI_EM_SEG"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
+    lab_tr_itk = sitk.GetImageFromArray(1 - train_labels) # walls are foreground, cells background
+    img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
+    img_tr_itk.SetSpacing((4, 4, 50))
+    lab_tr_itk.SetSpacing((4, 4, 50))
+    img_te_itk.SetSpacing((4, 4, 50))
+    # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training1_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training2_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training3_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training4_0000.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training1.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training2.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training3.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training4.nii.gz"))
+    sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = task_name
+    json_dict['description'] = task_name
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "EM",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(2)}
+    json_dict['numTraining'] = 5
+    json_dict['numTest'] = 1
+    json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
+                             range(5)]
+    json_dict['test'] = ["./imagesTs/testing.nii.gz"]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task059_EPFL_EM_MITO_SEG.py ADDED Viewed

	@@ -0,0 +1,99 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import numpy as np
+import subprocess
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from skimage import io
+import SimpleITK as sitk
+import shutil
+if __name__ == "__main__":
+    # download from here https://www.epfl.ch/labs/cvlab/data/data-em/
+    base = "/media/fabian/My Book/datasets/EPFL_MITO_SEG"
+    # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
+    # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
+    train_volume = io.imread(join(base, "training.tif"))
+    train_labels = io.imread(join(base, "training_groundtruth.tif"))
+    train_labels[train_labels == 255] = 1
+    test_volume = io.imread(join(base, "testing.tif"))
+    test_labels = io.imread(join(base, "testing_groundtruth.tif"))
+    test_labels[test_labels == 255] = 1
+    task_id = 59
+    task_name = "EPFL_EM_MITO_SEG"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    labelste = join(out_base, "labelsTs")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    maybe_mkdir_p(labelste)
+    img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
+    lab_tr_itk = sitk.GetImageFromArray(train_labels.astype(np.uint8))
+    img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
+    lab_te_itk = sitk.GetImageFromArray(test_labels.astype(np.uint8))
+    img_tr_itk.SetSpacing((5, 5, 5))
+    lab_tr_itk.SetSpacing((5, 5, 5))
+    img_te_itk.SetSpacing((5, 5, 5))
+    lab_te_itk.SetSpacing((5, 5, 5))
+    # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training1_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training2_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training3_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training4_0000.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training1.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training2.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training3.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training4.nii.gz"))
+    sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
+    sitk.WriteImage(lab_te_itk, join(labelste, "testing.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = task_name
+    json_dict['description'] = task_name
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "EM",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(2)}
+    json_dict['numTraining'] = 5
+    json_dict['numTest'] = 1
+    json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
+                             range(5)]
+    json_dict['test'] = ["./imagesTs/testing.nii.gz"]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task061_CREMI.py ADDED Viewed

	@@ -0,0 +1,146 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+import numpy as np
+from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
+import shutil
+import SimpleITK as sitk
+try:
+    import h5py
+except ImportError:
+    h5py = None
+def load_sample(filename):
+    # we need raw data and seg
+    f = h5py.File(filename, 'r')
+    data = np.array(f['volumes']['raw'])
+    if 'labels' in f['volumes'].keys():
+        labels = np.array(f['volumes']['labels']['clefts'])
+        # clefts are low values, background is high
+        labels = (labels < 100000).astype(np.uint8)
+    else:
+        labels = None
+    return data, labels
+def save_as_nifti(arr, filename, spacing):
+    itk_img = sitk.GetImageFromArray(arr)
+    itk_img.SetSpacing(spacing)
+    sitk.WriteImage(itk_img, filename)
+def prepare_submission():
+    from cremi.io import CremiFile
+    from cremi.Volume import Volume
+    base = "/home/fabian/drives/datasets/results/nnUNet/test_sets/Task061_CREMI/"
+    # a+
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_a+.nii.gz"))).astype(np.uint64)
+    pred[pred == 0] = 0xffffffffffffffff
+    out_a = CremiFile(join(base, 'sample_A+_20160601.hdf'), 'w')
+    clefts = Volume(pred, (40., 4., 4.))
+    out_a.write_clefts(clefts)
+    out_a.close()
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_b+.nii.gz"))).astype(np.uint64)
+    pred[pred == 0] = 0xffffffffffffffff
+    out_b = CremiFile(join(base, 'sample_B+_20160601.hdf'), 'w')
+    clefts = Volume(pred, (40., 4., 4.))
+    out_b.write_clefts(clefts)
+    out_b.close()
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_c+.nii.gz"))).astype(np.uint64)
+    pred[pred == 0] = 0xffffffffffffffff
+    out_c = CremiFile(join(base, 'sample_C+_20160601.hdf'), 'w')
+    clefts = Volume(pred, (40., 4., 4.))
+    out_c.write_clefts(clefts)
+    out_c.close()
+if __name__ == "__main__":
+    assert h5py is not None, "you need h5py for this. Install with 'pip install h5py'"
+    foldername = "Task061_CREMI"
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    base = "/media/fabian/My Book/datasets/CREMI"
+    # train
+    img, label = load_sample(join(base, "sample_A_20160501.hdf"))
+    save_as_nifti(img, join(imagestr, "sample_a_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_a.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_B_20160501.hdf"))
+    save_as_nifti(img, join(imagestr, "sample_b_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_b.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_C_20160501.hdf"))
+    save_as_nifti(img, join(imagestr, "sample_c_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_c.nii.gz"), (4, 4, 40))
+    save_as_nifti(img, join(imagestr, "sample_d_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_d.nii.gz"), (4, 4, 40))
+    save_as_nifti(img, join(imagestr, "sample_e_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_e.nii.gz"), (4, 4, 40))
+    # test
+    img, label = load_sample(join(base, "sample_A+_20160601.hdf"))
+    save_as_nifti(img, join(imagests, "sample_a+_0000.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_B+_20160601.hdf"))
+    save_as_nifti(img, join(imagests, "sample_b+_0000.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_C+_20160601.hdf"))
+    save_as_nifti(img, join(imagests, "sample_c+_0000.nii.gz"), (4, 4, 40))
+    json_dict = OrderedDict()
+    json_dict['name'] = foldername
+    json_dict['description'] = foldername
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "EM",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(2)}
+    json_dict['numTraining'] = 5
+    json_dict['numTest'] = 1
+    json_dict['training'] = [{'image': "./imagesTr/sample_%s.nii.gz" % i, "label": "./labelsTr/sample_%s.nii.gz" % i} for i in
+                             ['a', 'b', 'c', 'd', 'e']]
+    json_dict['test'] = ["./imagesTs/sample_a+.nii.gz", "./imagesTs/sample_b+.nii.gz", "./imagesTs/sample_c+.nii.gz"]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    out_preprocessed = join(preprocessing_output_dir, foldername)
+    maybe_mkdir_p(out_preprocessed)
+    # manual splits. we train 5 models on all three datasets
+    splits = [{'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]}]
+    save_pickle(splits, join(out_preprocessed, "splits_final.pkl"))

nnunet/dataset_conversion/Task062_NIHPancreas.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from multiprocessing import Pool
+import nibabel
+def reorient(filename):
+    img = nibabel.load(filename)
+    img = nibabel.as_closest_canonical(img)
+    nibabel.save(img, filename)
+if __name__ == "__main__":
+    base = "/media/fabian/DeepLearningData/Pancreas-CT"
+    # reorient
+    p = Pool(8)
+    results = []
+    for f in subfiles(join(base, "data"), suffix=".nii.gz"):
+        results.append(p.map_async(reorient, (f, )))
+    _ = [i.get() for i in results]
+    for f in subfiles(join(base, "TCIA_pancreas_labels-02-05-2017"), suffix=".nii.gz"):
+        results.append(p.map_async(reorient, (f, )))
+    _ = [i.get() for i in results]
+    task_id = 62
+    task_name = "NIHPancreas"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    cases = list(range(1, 83))
+    folder_data = join(base, "data")
+    folder_labels = join(base, "TCIA_pancreas_labels-02-05-2017")
+    for c in cases:
+        casename = "pancreas_%04.0d" % c
+        shutil.copy(join(folder_data, "PANCREAS_%04.0d.nii.gz" % c), join(imagestr, casename + "_0000.nii.gz"))
+        shutil.copy(join(folder_labels, "label%04.0d.nii.gz" % c), join(labelstr, casename + ".nii.gz"))
+        train_patient_names.append(casename)
+    json_dict = OrderedDict()
+    json_dict['name'] = task_name
+    json_dict['description'] = task_name
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see website"
+    json_dict['licence'] = "see website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Pancreas",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task064_KiTS_labelsFixed.py ADDED Viewed

	@@ -0,0 +1,84 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+if __name__ == "__main__":
+    """
+    This is the KiTS dataset after Nick fixed all the labels that had errors. Downloaded on Jan 6th 2020
+    """
+    base = "/media/fabian/My Book/datasets/KiTS_clean/kits19/data"
+    task_id = 64
+    task_name = "KiTS_labelsFixed"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    all_cases = subfolders(base, join=False)
+    train_patients = all_cases[:210]
+    test_patients = all_cases[210:]
+    for p in train_patients:
+        curr = join(base, p)
+        label_file = join(curr, "segmentation.nii.gz")
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names.append(p)
+    for p in test_patients:
+        curr = join(base, p)
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+        test_patient_names.append(p)
+    json_dict = {}
+    json_dict['name'] = "KiTS"
+    json_dict['description'] = "kidney and kidney tumor segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "KiTS data for nnunet"
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Kidney",
+        "2": "Tumor"
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task065_KiTS_NicksLabels.py ADDED Viewed

	@@ -0,0 +1,87 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+if __name__ == "__main__":
+    """
+    Nick asked me to rerun the training with other labels (the Kidney region is defined differently).
+    These labels operate in interpolated spacing. I don't like that but that's how it is
+    """
+    base = "/media/fabian/My Book/datasets/KiTS_NicksLabels/kits19/data"
+    labelsdir = "/media/fabian/My Book/datasets/KiTS_NicksLabels/filled_labels"
+    task_id = 65
+    task_name = "KiTS_NicksLabels"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    all_cases = subfolders(base, join=False)
+    train_patients = all_cases[:210]
+    test_patients = all_cases[210:]
+    for p in train_patients:
+        curr = join(base, p)
+        label_file = join(labelsdir, p + ".nii.gz")
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names.append(p)
+    for p in test_patients:
+        curr = join(base, p)
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+        test_patient_names.append(p)
+    json_dict = {}
+    json_dict['name'] = "KiTS"
+    json_dict['description'] = "kidney and kidney tumor segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "KiTS data for nnunet"
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Kidney",
+        "2": "Tumor"
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task069_CovidSeg.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import shutil
+from batchgenerators.utilities.file_and_folder_operations import *
+import SimpleITK as sitk
+from nnunet.paths import nnUNet_raw_data
+if __name__ == '__main__':
+    #data is available at http://medicalsegmentation.com/covid19/
+    download_dir = '/home/fabian/Downloads'
+    task_id = 69
+    task_name = "CovidSeg"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    # the niftis are 3d, but they are just stacks of 2d slices from different patients. So no 3d U-Net, please
+    # the training stack has 100 slices, so we split it into 5 equally sized parts (20 slices each) for cross-validation
+    training_data = sitk.GetArrayFromImage(sitk.ReadImage(join(download_dir, 'tr_im.nii.gz')))
+    training_labels = sitk.GetArrayFromImage(sitk.ReadImage(join(download_dir, 'tr_mask.nii.gz')))
+    for f in range(5):
+        this_name = 'part_%d' % f
+        data = training_data[f::5]
+        labels = training_labels[f::5]
+        sitk.WriteImage(sitk.GetImageFromArray(data), join(imagestr, this_name + '_0000.nii.gz'))
+        sitk.WriteImage(sitk.GetImageFromArray(labels), join(labelstr, this_name + '.nii.gz'))
+        train_patient_names.append(this_name)
+    shutil.copy(join(download_dir, 'val_im.nii.gz'), join(imagests, 'val_im.nii.gz'))
+    test_patient_names.append('val_im')
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "nonct",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "stuff1",
+        "2": "stuff2",
+        "3": "stuff3",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py ADDED Viewed

	@@ -0,0 +1,137 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from multiprocessing import Pool
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+from nnunet.paths import preprocessing_output_dir
+from skimage.io import imread
+def load_tiff_convert_to_nifti(img_file, lab_file, img_out_base, anno_out, spacing):
+    img = imread(img_file)
+    img_itk = sitk.GetImageFromArray(img.astype(np.float32))
+    img_itk.SetSpacing(np.array(spacing)[::-1])
+    sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
+    if lab_file is not None:
+        l = imread(lab_file)
+        l[l > 0] = 1
+        l_itk = sitk.GetImageFromArray(l.astype(np.uint8))
+        l_itk.SetSpacing(np.array(spacing)[::-1])
+        sitk.WriteImage(l_itk, anno_out)
+def prepare_task(base, task_id, task_name, spacing):
+    p = Pool(16)
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    res = []
+    for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
+        train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
+        for t in train_cases:
+            casename = train_sequence + "_" + t[:-4]
+            img_file = join(base + '_train', train_sequence, t)
+            lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
+            if not isfile(lab_file):
+                continue
+            img_out_base = join(imagestr, casename)
+            anno_out = join(labelstr, casename + ".nii.gz")
+            res.append(
+                p.starmap_async(load_tiff_convert_to_nifti, ((img_file, lab_file, img_out_base, anno_out, spacing),)))
+            train_patient_names.append(casename)
+    for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
+        test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
+        for t in test_cases:
+            casename = test_sequence + "_" + t[:-4]
+            img_file = join(base + '_test', test_sequence, t)
+            lab_file = None
+            img_out_base = join(imagests, casename)
+            anno_out = None
+            res.append(
+                p.starmap_async(load_tiff_convert_to_nifti, ((img_file, lab_file, img_out_base, anno_out, spacing),)))
+            test_patient_names.append(casename)
+    _ = [i.get() for i in res]
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "BF",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "cell",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    p.close()
+    p.join()
+if __name__ == "__main__":
+    base = "/media/fabian/My Book/datasets/CellTrackingChallenge/Fluo-C3DH-A549_ManAndSim"
+    task_id = 75
+    task_name = 'Fluo_C3DH_A549_ManAndSim'
+    spacing = (1, 0.126, 0.126)
+    prepare_task(base, task_id, task_name, spacing)
+    task_name = "Task075_Fluo_C3DH_A549_ManAndSim"
+    labelsTr = join(nnUNet_raw_data, task_name, "labelsTr")
+    cases = subfiles(labelsTr, suffix='.nii.gz', join=False)
+    splits = []
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('01_') or i.startswith('02_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('02_') and not i.startswith('02_SIM')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('02_') or i.startswith('01_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('01_') and not i.startswith('01_SIM')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('01_') or i.startswith('02_') and not i.startswith('02_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('02_SIM')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('02_') or i.startswith('01_') and not i.startswith('01_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('01_SIM')]}
+    )
+    save_pickle(splits, join(preprocessing_output_dir, task_name, "splits_final.pkl"))

nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py ADDED Viewed

	@@ -0,0 +1,312 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from multiprocessing import Pool
+from multiprocessing.dummy import Pool
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from skimage.io import imread
+from skimage.io import imsave
+from skimage.morphology import ball
+from skimage.morphology import erosion
+from skimage.transform import resize
+from nnunet.paths import nnUNet_raw_data
+from nnunet.paths import preprocessing_output_dir
+def load_bmp_convert_to_nifti_borders(img_file, lab_file, img_out_base, anno_out, spacing, border_thickness=0.7):
+    img = imread(img_file)
+    img_itk = sitk.GetImageFromArray(img.astype(np.float32))
+    img_itk.SetSpacing(np.array(spacing)[::-1])
+    sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
+    if lab_file is not None:
+        l = imread(lab_file)
+        borders = generate_border_as_suggested_by_twollmann(l, spacing, border_thickness)
+        l[l > 0] = 1
+        l[borders == 1] = 2
+        l_itk = sitk.GetImageFromArray(l.astype(np.uint8))
+        l_itk.SetSpacing(np.array(spacing)[::-1])
+        sitk.WriteImage(l_itk, anno_out)
+def generate_ball(spacing, radius, dtype=int):
+    radius_in_voxels = np.round(radius / np.array(spacing)).astype(int)
+    n = 2 * radius_in_voxels + 1
+    ball_iso = ball(max(n) * 2, dtype=np.float64)
+    ball_resampled = resize(ball_iso, n, 1, 'constant', 0, clip=True, anti_aliasing=False, preserve_range=True)
+    ball_resampled[ball_resampled > 0.5] = 1
+    ball_resampled[ball_resampled <= 0.5] = 0
+    return ball_resampled.astype(dtype)
+def generate_border_as_suggested_by_twollmann(label_img: np.ndarray, spacing, border_thickness: float = 2) -> np.ndarray:
+    border = np.zeros_like(label_img)
+    selem = generate_ball(spacing, border_thickness)
+    for l in np.unique(label_img):
+        if l == 0: continue
+        mask = (label_img == l).astype(int)
+        eroded = erosion(mask, selem)
+        border[(eroded == 0) & (mask != 0)] = 1
+    return border
+def find_differences(labelstr1, labelstr2):
+    for n in subfiles(labelstr1, suffix='.nii.gz', join=False):
+        a = sitk.GetArrayFromImage(sitk.ReadImage(join(labelstr1, n)))
+        b = sitk.GetArrayFromImage(sitk.ReadImage(join(labelstr2, n)))
+        print(n, np.sum(a != b))
+def prepare_task(base, task_id, task_name, spacing, border_thickness: float = 15, processes: int = 16):
+    p = Pool(processes)
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    res = []
+    for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
+        train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
+        for t in train_cases:
+            casename = train_sequence + "_" + t[:-4]
+            img_file = join(base + '_train', train_sequence, t)
+            lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
+            if not isfile(lab_file):
+                continue
+            img_out_base = join(imagestr, casename)
+            anno_out = join(labelstr, casename + ".nii.gz")
+            res.append(
+                p.starmap_async(load_bmp_convert_to_nifti_borders, ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
+            train_patient_names.append(casename)
+    for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
+        test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
+        for t in test_cases:
+            casename = test_sequence + "_" + t[:-4]
+            img_file = join(base + '_test', test_sequence, t)
+            lab_file = None
+            img_out_base = join(imagests, casename)
+            anno_out = None
+            res.append(
+                p.starmap_async(load_bmp_convert_to_nifti_borders, ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
+            test_patient_names.append(casename)
+    _ = [i.get() for i in res]
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "BF",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "cell",
+        "2": "border",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    p.close()
+    p.join()
+def plot_images(folder, output_folder):
+    maybe_mkdir_p(output_folder)
+    import matplotlib.pyplot as plt
+    for i in subfiles(folder, suffix='.nii.gz', join=False):
+        img = sitk.GetArrayFromImage(sitk.ReadImage(join(folder, i)))
+        center_slice = img[img.shape[0]//2]
+        plt.imsave(join(output_folder, i[:-7] + '.png'), center_slice)
+def convert_to_tiff(nifti_image: str, output_name: str):
+    npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti_image))
+    imsave(output_name, npy.astype(np.uint16),  compress=6)
+def convert_to_instance_seg(arr: np.ndarray, spacing: tuple = (0.2, 0.125, 0.125)):
+    from skimage.morphology import label, dilation
+    # 1 is core, 2 is border
+    objects = label((arr == 1).astype(int))
+    final = np.copy(objects)
+    remaining_border = arr == 2
+    current = np.copy(objects)
+    dilated_mm = np.array((0, 0, 0))
+    spacing = np.array(spacing)
+    while np.sum(remaining_border) > 0:
+        strel_size = [0, 0, 0]
+        maximum_dilation = max(dilated_mm)
+        for i in range(3):
+            if spacing[i] == min(spacing):
+                strel_size[i] = 1
+                continue
+            if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
+                strel_size[i] = 1
+        ball_here = ball(1)
+        if strel_size[0] == 0: ball_here = ball_here[1:2]
+        if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
+        if strel_size[2] == 0: ball_here = ball_here[:, :, 1:2]
+        #print(1)
+        dilated = dilation(current, ball_here)
+        diff = (current == 0) & (dilated != current)
+        final[diff & remaining_border] = dilated[diff & remaining_border]
+        remaining_border[diff] = 0
+        current = dilated
+        dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(3)]
+    return final.astype(np.uint32)
+def convert_to_instance_seg2(arr: np.ndarray, spacing: tuple = (0.2, 0.125, 0.125), small_center_threshold=30,
+                             isolated_border_as_separate_instance_threshold: int = 15):
+    from skimage.morphology import label, dilation
+    # we first identify centers that are too small and set them to be border. This should remove false positive instances
+    objects = label((arr == 1).astype(int))
+    for o in np.unique(objects):
+        if o > 0 and np.sum(objects == o) <= small_center_threshold:
+            arr[objects == o] = 2
+    # 1 is core, 2 is border
+    objects = label((arr == 1).astype(int))
+    final = np.copy(objects)
+    remaining_border = arr == 2
+    current = np.copy(objects)
+    dilated_mm = np.array((0, 0, 0))
+    spacing = np.array(spacing)
+    while np.sum(remaining_border) > 0:
+        strel_size = [0, 0, 0]
+        maximum_dilation = max(dilated_mm)
+        for i in range(3):
+            if spacing[i] == min(spacing):
+                strel_size[i] = 1
+                continue
+            if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
+                strel_size[i] = 1
+        ball_here = ball(1)
+        if strel_size[0] == 0: ball_here = ball_here[1:2]
+        if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
+        if strel_size[2] == 0: ball_here = ball_here[:, :, 1:2]
+        #print(1)
+        dilated = dilation(current, ball_here)
+        diff = (current == 0) & (dilated != current)
+        final[diff & remaining_border] = dilated[diff & remaining_border]
+        remaining_border[diff] = 0
+        current = dilated
+        dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(3)]
+    # what can happen is that a cell is so small that the network only predicted border and no core. This cell will be
+    # fused with the nearest other instance, which we don't want. Therefore we identify isolated border predictions and
+    # give them a separate instance id
+    # we identify isolated border predictions by checking each foreground object in arr and see whether this object
+    # also contains label 1
+    max_label = np.max(final)
+    foreground_objects = label((arr != 0).astype(int))
+    for i in np.unique(foreground_objects):
+        if i > 0 and (1 not in np.unique(arr[foreground_objects==i])):
+            size_of_object = np.sum(foreground_objects==i)
+            if size_of_object >= isolated_border_as_separate_instance_threshold:
+                final[foreground_objects == i] = max_label + 1
+                max_label += 1
+                #print('yeah boi')
+    return final.astype(np.uint32)
+def load_instanceseg_save(in_file: str, out_file:str, better: bool):
+    itk_img = sitk.ReadImage(in_file)
+    if not better:
+        instanceseg = convert_to_instance_seg(sitk.GetArrayFromImage(itk_img))
+    else:
+        instanceseg = convert_to_instance_seg2(sitk.GetArrayFromImage(itk_img))
+    itk_out = sitk.GetImageFromArray(instanceseg)
+    itk_out.CopyInformation(itk_img)
+    sitk.WriteImage(itk_out, out_file)
+def convert_all_to_instance(input_folder: str, output_folder: str, processes: int = 24, better: bool = False):
+    maybe_mkdir_p(output_folder)
+    p = Pool(processes)
+    files = subfiles(input_folder, suffix='.nii.gz', join=False)
+    output_files = [join(output_folder, i) for i in files]
+    input_files = [join(input_folder, i) for i in files]
+    better = [better] * len(files)
+    r = p.starmap_async(load_instanceseg_save, zip(input_files, output_files, better))
+    _ = r.get()
+    p.close()
+    p.join()
+if __name__ == "__main__":
+    base = "/home/fabian/data/Fluo-N3DH-SIM"
+    task_id = 76
+    task_name = 'Fluo_N3DH_SIM'
+    spacing = (0.2, 0.125, 0.125)
+    border_thickness = 0.5
+    prepare_task(base, task_id, task_name, spacing, border_thickness, 12)
+    # we need custom splits
+    task_name = "Task076_Fluo_N3DH_SIM"
+    labelsTr = join(nnUNet_raw_data, task_name, "labelsTr")
+    cases = subfiles(labelsTr, suffix='.nii.gz', join=False)
+    splits = []
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('01_')],
+         'val': [i[:-7] for i in cases if i.startswith('02_')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('02_')],
+         'val': [i[:-7] for i in cases if i.startswith('01_')]}
+    )
+    maybe_mkdir_p(join(preprocessing_output_dir, task_name))
+    save_pickle(splits, join(preprocessing_output_dir, task_name, "splits_final.pkl"))
+    # test set was converted to instance seg with convert_all_to_instance with better=True
+    # convert to tiff with convert_to_tiff

nnunet/dataset_conversion/Task082_BraTS_2020.py ADDED Viewed

	@@ -0,0 +1,751 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from collections import OrderedDict
+from copy import deepcopy
+from multiprocessing.pool import Pool
+from typing import Tuple
+import SimpleITK as sitk
+import numpy as np
+import scipy.stats as ss
+from batchgenerators.utilities.file_and_folder_operations import *
+from medpy.metric import dc, hd95
+from nnunet.dataset_conversion.Task032_BraTS_2018 import convert_labels_back_to_BraTS_2018_2019_convention
+from nnunet.dataset_conversion.Task043_BraTS_2019 import copy_BraTS_segmentation_and_convert_labels
+from nnunet.evaluation.region_based_evaluation import get_brats_regions, evaluate_regions
+from nnunet.paths import nnUNet_raw_data
+from nnunet.postprocessing.consolidate_postprocessing import collect_cv_niftis
+def apply_brats_threshold(fname, out_dir, threshold, replace_with):
+    img_itk = sitk.ReadImage(fname)
+    img_npy = sitk.GetArrayFromImage(img_itk)
+    s = np.sum(img_npy == 3)
+    if s < threshold:
+        # print(s, fname)
+        img_npy[img_npy == 3] = replace_with
+    img_itk_postprocessed = sitk.GetImageFromArray(img_npy)
+    img_itk_postprocessed.CopyInformation(img_itk)
+    sitk.WriteImage(img_itk_postprocessed, join(out_dir, fname.split("/")[-1]))
+def load_niftis_threshold_compute_dice(gt_file, pred_file, thresholds: Tuple[list, tuple]):
+    gt = sitk.GetArrayFromImage(sitk.ReadImage(gt_file))
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(pred_file))
+    mask_pred = pred == 3
+    mask_gt = gt == 3
+    num_pred = np.sum(mask_pred)
+    num_gt = np.sum(mask_gt)
+    dice = dc(mask_pred, mask_gt)
+    res_dice = {}
+    res_was_smaller = {}
+    for t in thresholds:
+        was_smaller = False
+        if num_pred < t:
+            was_smaller = True
+            if num_gt == 0:
+                dice_here = 1.
+            else:
+                dice_here = 0.
+        else:
+            dice_here = deepcopy(dice)
+        res_dice[t] = dice_here
+        res_was_smaller[t] = was_smaller
+    return res_was_smaller, res_dice
+def apply_threshold_to_folder(folder_in, folder_out, threshold, replace_with, processes=24):
+    maybe_mkdir_p(folder_out)
+    niftis = subfiles(folder_in, suffix='.nii.gz', join=True)
+    p = Pool(processes)
+    p.starmap(apply_brats_threshold, zip(niftis, [folder_out]*len(niftis), [threshold]*len(niftis), [replace_with] * len(niftis)))
+    p.close()
+    p.join()
+def determine_brats_postprocessing(folder_with_preds, folder_with_gt, postprocessed_output_dir, processes=8,
+        thresholds=(0, 10, 50, 100, 200, 500, 750, 1000, 1500, 2500, 10000), replace_with=2):
+    # find pairs
+    nifti_gt = subfiles(folder_with_gt, suffix=".nii.gz", sort=True)
+    p = Pool(processes)
+    nifti_pred = subfiles(folder_with_preds, suffix='.nii.gz', sort=True)
+    results = p.starmap_async(load_niftis_threshold_compute_dice, zip(nifti_gt, nifti_pred, [thresholds] * len(nifti_pred)))
+    results = results.get()
+    all_dc_per_threshold = {}
+    for t in thresholds:
+        all_dc_per_threshold[t] = np.array([i[1][t] for i in results])
+        print(t, np.mean(all_dc_per_threshold[t]))
+    means = [np.mean(all_dc_per_threshold[t]) for t in thresholds]
+    best_threshold = thresholds[np.argmax(means)]
+    print('best', best_threshold, means[np.argmax(means)])
+    maybe_mkdir_p(postprocessed_output_dir)
+    p.starmap(apply_brats_threshold, zip(nifti_pred, [postprocessed_output_dir]*len(nifti_pred), [best_threshold]*len(nifti_pred), [replace_with] * len(nifti_pred)))
+    p.close()
+    p.join()
+    save_pickle((thresholds, means, best_threshold, all_dc_per_threshold), join(postprocessed_output_dir, "threshold.pkl"))
+def collect_and_prepare(base_dir, num_processes = 12, clean=False):
+    """
+    collect all cv_niftis, compute brats metrics, compute enh tumor thresholds and summarize in csv
+    :param base_dir:
+    :return:
+    """
+    out = join(base_dir, 'cv_results')
+    out_pp = join(base_dir, 'cv_results_pp')
+    experiments = subfolders(base_dir, join=False, prefix='nnUNetTrainer')
+    regions = get_brats_regions()
+    gt_dir = join(base_dir, 'gt_niftis')
+    replace_with = 2
+    failed = []
+    successful = []
+    for e in experiments:
+        print(e)
+        try:
+            o = join(out, e)
+            o_p = join(out_pp, e)
+            maybe_mkdir_p(o)
+            maybe_mkdir_p(o_p)
+            collect_cv_niftis(join(base_dir, e), o)
+            if clean or not isfile(join(o, 'summary.csv')):
+                evaluate_regions(o, gt_dir, regions, num_processes)
+            if clean or not isfile(join(o_p, 'threshold.pkl')):
+                determine_brats_postprocessing(o, gt_dir, o_p, num_processes, thresholds=list(np.arange(0, 760, 10)), replace_with=replace_with)
+            if clean or not isfile(join(o_p, 'summary.csv')):
+                evaluate_regions(o_p, gt_dir, regions, num_processes)
+            successful.append(e)
+        except Exception as ex:
+            print("\nERROR\n", e, ex, "\n")
+            failed.append(e)
+    # we are interested in the mean (nan is 1) column
+    with open(join(base_dir, 'cv_summary.csv'), 'w') as f:
+        f.write('name,whole,core,enh,mean\n')
+        for e in successful:
+            expected_nopp = join(out, e, 'summary.csv')
+            expected_pp = join(out, out_pp, e, 'summary.csv')
+            if isfile(expected_nopp):
+                res = np.loadtxt(expected_nopp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_noPP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+            if isfile(expected_pp):
+                res = np.loadtxt(expected_pp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_PP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+    # this just crawls the folders and evaluates what it finds
+    with open(join(base_dir, 'cv_summary2.csv'), 'w') as f:
+        for folder in ['cv_results', 'cv_results_pp']:
+            for ex in subdirs(join(base_dir, folder), join=False):
+                print(folder, ex)
+                expected = join(base_dir, folder, ex, 'summary.csv')
+                if clean or not isfile(expected):
+                    evaluate_regions(join(base_dir, folder, ex), gt_dir, regions, num_processes)
+                if isfile(expected):
+                    res = np.loadtxt(expected, dtype=str, skiprows=0, delimiter=',')[-2]
+                    as_numeric = [float(i) for i in res[1:]]
+                    f.write('%s__%s,' % (folder, ex))
+                    f.write("%0.4f," % as_numeric[0])
+                    f.write("%0.4f," % as_numeric[1])
+                    f.write("%0.4f," % as_numeric[2])
+                    f.write("%0.4f\n" % np.mean(as_numeric))
+        f.write('name,whole,core,enh,mean\n')
+        for e in successful:
+            expected_nopp = join(out, e, 'summary.csv')
+            expected_pp = join(out, out_pp, e, 'summary.csv')
+            if isfile(expected_nopp):
+                res = np.loadtxt(expected_nopp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_noPP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+            if isfile(expected_pp):
+                res = np.loadtxt(expected_pp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_PP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+    # apply threshold to val set
+    expected_num_cases = 125
+    missing_valset = []
+    has_val_pred = []
+    for e in successful:
+        if isdir(join(base_dir, 'predVal', e)):
+            currdir = join(base_dir, 'predVal', e)
+            files = subfiles(currdir, suffix='.nii.gz', join=False)
+            if len(files) != expected_num_cases:
+                print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases))
+                continue
+            output_folder = join(base_dir, 'predVal_PP', e)
+            maybe_mkdir_p(output_folder)
+            threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
+            if threshold > 1000: threshold = 750  # don't make it too big!
+            apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
+            has_val_pred.append(e)
+        else:
+            print(e, 'has no valset predictions')
+            missing_valset.append(e)
+    # 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold' needs special treatment
+    e = 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5'
+    currdir = join(base_dir, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    output_folder = join(base_dir, 'predVal_PP', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    maybe_mkdir_p(output_folder)
+    threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
+    if threshold > 1000: threshold = 750  # don't make it too big!
+    apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
+    # 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold' needs special treatment
+    e = 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5'
+    currdir = join(base_dir, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    output_folder = join(base_dir, 'predVal_PP', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    maybe_mkdir_p(output_folder)
+    threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
+    if threshold > 1000: threshold = 750  # don't make it too big!
+    apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
+    # convert val set to brats labels for submission
+    output_converted = join(base_dir, 'converted_valSet')
+    for source in ['predVal', 'predVal_PP']:
+        for e in has_val_pred + ['nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold']:
+            expected_source_folder = join(base_dir, source, e)
+            if not isdir(expected_source_folder):
+                print(e, 'has no', source)
+                raise RuntimeError()
+            files = subfiles(expected_source_folder, suffix='.nii.gz', join=False)
+            if len(files) != expected_num_cases:
+                print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases))
+                continue
+            target_folder = join(output_converted, source, e)
+            maybe_mkdir_p(target_folder)
+            convert_labels_back_to_BraTS_2018_2019_convention(expected_source_folder, target_folder)
+    summarize_validation_set_predictions(output_converted)
+def summarize_validation_set_predictions(base):
+    with open(join(base, 'summary.csv'), 'w') as f:
+        f.write('name,whole,core,enh,mean,whole,core,enh,mean\n')
+        for subf in subfolders(base, join=False):
+            for e in subfolders(join(base, subf), join=False):
+                expected = join(base, subf, e, 'Stats_Validation_final.csv')
+                if not isfile(expected):
+                    print(subf, e, 'has missing csv')
+                    continue
+                a = np.loadtxt(expected, delimiter=',', dtype=str)
+                assert a.shape[0] == 131, 'did not evaluate all 125 cases!'
+                selected_row = a[-5]
+                values = [float(i) for i in selected_row[1:4]]
+                f.write(e + "_" + subf + ',')
+                f.write("%0.4f," % values[1])
+                f.write("%0.4f," % values[2])
+                f.write("%0.4f," % values[0])
+                f.write("%0.4f," % np.mean(values))
+                values = [float(i) for i in selected_row[-3:]]
+                f.write("%0.4f," % values[1])
+                f.write("%0.4f," % values[2])
+                f.write("%0.4f," % values[0])
+                f.write("%0.4f\n" % np.mean(values))
+def compute_BraTS_dice(ref, pred):
+    """
+    ref and gt are binary integer numpy.ndarray s
+    :param ref:
+    :param gt:
+    :return:
+    """
+    num_ref = np.sum(ref)
+    num_pred = np.sum(pred)
+    if num_ref == 0:
+        if num_pred == 0:
+            return 1
+        else:
+            return 0
+    else:
+        return dc(pred, ref)
+def convert_all_to_BraTS(input_folder, output_folder, expected_num_cases=125):
+    for s in subdirs(input_folder, join=False):
+        nii = subfiles(join(input_folder, s), suffix='.nii.gz', join=False)
+        if len(nii) != expected_num_cases:
+            print(s)
+        else:
+            target_dir = join(output_folder, s)
+            convert_labels_back_to_BraTS_2018_2019_convention(join(input_folder, s), target_dir, num_processes=6)
+def compute_BraTS_HD95(ref, pred):
+    """
+    ref and gt are binary integer numpy.ndarray s
+    spacing is assumed to be (1, 1, 1)
+    :param ref:
+    :param pred:
+    :return:
+    """
+    num_ref = np.sum(ref)
+    num_pred = np.sum(pred)
+    if num_ref == 0:
+        if num_pred == 0:
+            return 0
+        else:
+            return 373.12866
+    elif num_pred == 0 and num_ref != 0:
+        return 373.12866
+    else:
+        return hd95(pred, ref, (1, 1, 1))
+def evaluate_BraTS_case(arr: np.ndarray, arr_gt: np.ndarray):
+    """
+    attempting to reimplement the brats evaluation scheme
+    assumes edema=1, non_enh=2, enh=3
+    :param arr:
+    :param arr_gt:
+    :return:
+    """
+    # whole tumor
+    mask_gt = (arr_gt != 0).astype(int)
+    mask_pred = (arr != 0).astype(int)
+    dc_whole = compute_BraTS_dice(mask_gt, mask_pred)
+    hd95_whole = compute_BraTS_HD95(mask_gt, mask_pred)
+    del mask_gt, mask_pred
+    # tumor core
+    mask_gt = (arr_gt > 1).astype(int)
+    mask_pred = (arr > 1).astype(int)
+    dc_core = compute_BraTS_dice(mask_gt, mask_pred)
+    hd95_core = compute_BraTS_HD95(mask_gt, mask_pred)
+    del mask_gt, mask_pred
+    # enhancing
+    mask_gt = (arr_gt == 3).astype(int)
+    mask_pred = (arr == 3).astype(int)
+    dc_enh = compute_BraTS_dice(mask_gt, mask_pred)
+    hd95_enh = compute_BraTS_HD95(mask_gt, mask_pred)
+    del mask_gt, mask_pred
+    return dc_whole, dc_core, dc_enh, hd95_whole, hd95_core, hd95_enh
+def load_evaluate(filename_gt: str, filename_pred: str):
+    arr_pred = sitk.GetArrayFromImage(sitk.ReadImage(filename_pred))
+    arr_gt = sitk.GetArrayFromImage(sitk.ReadImage(filename_gt))
+    return evaluate_BraTS_case(arr_pred, arr_gt)
+def evaluate_BraTS_folder(folder_pred, folder_gt, num_processes: int = 24, strict=False):
+    nii_pred = subfiles(folder_pred, suffix='.nii.gz', join=False)
+    if len(nii_pred) == 0:
+        return
+    nii_gt = subfiles(folder_gt, suffix='.nii.gz', join=False)
+    assert all([i in nii_gt for i in nii_pred]), 'not all predicted niftis have a reference file!'
+    if strict:
+        assert all([i in nii_pred for i in nii_gt]), 'not all gt niftis have a predicted file!'
+    p = Pool(num_processes)
+    nii_pred_fullpath = [join(folder_pred, i) for i in nii_pred]
+    nii_gt_fullpath = [join(folder_gt, i) for i in nii_pred]
+    results = p.starmap(load_evaluate, zip(nii_gt_fullpath, nii_pred_fullpath))
+    # now write to output file
+    with open(join(folder_pred, 'results.csv'), 'w') as f:
+        f.write("name,dc_whole,dc_core,dc_enh,hd95_whole,hd95_core,hd95_enh\n")
+        for fname, r in zip(nii_pred, results):
+            f.write(fname)
+            f.write(",%0.4f,%0.4f,%0.4f,%3.3f,%3.3f,%3.3f\n" % r)
+def load_csv_for_ranking(csv_file: str):
+    res = np.loadtxt(csv_file, dtype='str', delimiter=',')
+    scores = res[1:, [1, 2, 3, -3, -2, -1]].astype(float)
+    scores[:, -3:] *= -1
+    scores[:, -3:] += 373.129
+    assert np.all(scores <= 373.129)
+    assert np.all(scores >= 0)
+    return scores
+def rank_algorithms(data:np.ndarray):
+    """
+    data is (metrics x experiments x cases)
+    :param data:
+    :return:
+    """
+    num_metrics, num_experiments, num_cases = data.shape
+    ranks = np.zeros((num_metrics, num_experiments))
+    for m in range(6):
+        r = np.apply_along_axis(ss.rankdata, 0, -data[m], 'min')
+        ranks[m] = r.mean(1)
+    average_rank = np.mean(ranks, 0)
+    final_ranks = ss.rankdata(average_rank, 'min')
+    return final_ranks, average_rank, ranks
+def score_and_postprocess_model_based_on_rank_then_aggregate():
+    """
+    Similarly to BraTS 2017 - BraTS 2019, each participant will be ranked for each of the X test cases. Each case
+    includes 3 regions of evaluation, and the metrics used to produce the rankings will be the Dice Similarity
+    Coefficient and the 95% Hausdorff distance. Thus, for X number of cases included in the BraTS 2020, each
+    participant ends up having X*3*2 rankings. The final ranking score is the average of all these rankings normalized
+    by the number of teams.
+    https://zenodo.org/record/3718904
+    -> let's optimize for this.
+    Important: the outcome very much depends on the competing models. We need some references. We only got our own,
+    so let's hope this still works
+    :return:
+    """
+    base = "/media/fabian/Results/nnUNet/3d_fullres/Task082_BraTS2020"
+    replace_with = 2
+    num_processes = 24
+    expected_num_cases_val = 125
+    # use a separate output folder from the previous experiments to ensure we are not messing things up
+    output_base_here = join(base, 'use_brats_ranking')
+    maybe_mkdir_p(output_base_here)
+    # collect cv niftis and compute metrics with evaluate_BraTS_folder to ensure we work with the same metrics as brats
+    out = join(output_base_here, 'cv_results')
+    experiments = subfolders(base, join=False, prefix='nnUNetTrainer')
+    gt_dir = join(base, 'gt_niftis')
+    experiments_with_full_cv = []
+    for e in experiments:
+        print(e)
+        o = join(out, e)
+        maybe_mkdir_p(o)
+        try:
+            collect_cv_niftis(join(base, e), o)
+            if not isfile(join(o, 'results.csv')):
+                evaluate_BraTS_folder(o, gt_dir, num_processes, strict=True)
+            experiments_with_full_cv.append(e)
+        except Exception as ex:
+            print("\nERROR\n", e, ex, "\n")
+            if isfile(join(o, 'results.csv')):
+                os.remove(join(o, 'results.csv'))
+    # rank the non-postprocessed models
+    tmp = np.loadtxt(join(out, experiments_with_full_cv[0], 'results.csv'), dtype='str', delimiter=',')
+    num_cases = len(tmp) - 1
+    data_for_ranking = np.zeros((6, len(experiments_with_full_cv), num_cases))
+    for i, e in enumerate(experiments_with_full_cv):
+        scores = load_csv_for_ranking(join(out, e, 'results.csv'))
+        for metric in range(6):
+            data_for_ranking[metric, i] = scores[:, metric]
+    final_ranks, average_rank, ranks = rank_algorithms(data_for_ranking)
+    for t in np.argsort(final_ranks):
+        print(final_ranks[t], average_rank[t], experiments_with_full_cv[t])
+    # for each model, create output directories with different thresholds. evaluate ALL OF THEM (might take a while lol)
+    thresholds = np.arange(25, 751, 25)
+    output_pp_tmp = join(output_base_here, 'cv_determine_pp_thresholds')
+    for e in experiments_with_full_cv:
+        input_folder = join(out, e)
+        for t in thresholds:
+            output_directory = join(output_pp_tmp, e, str(t))
+            maybe_mkdir_p(output_directory)
+            if not isfile(join(output_directory, 'results.csv')):
+                apply_threshold_to_folder(input_folder, output_directory, t, replace_with, processes=16)
+                evaluate_BraTS_folder(output_directory, gt_dir, num_processes)
+    # load ALL the results!
+    results = []
+    experiment_names = []
+    for e in experiments_with_full_cv:
+        for t in thresholds:
+            output_directory = join(output_pp_tmp, e, str(t))
+            expected_file = join(output_directory, 'results.csv')
+            if not isfile(expected_file):
+                print(e, 'does not have a results file for threshold', t)
+                continue
+            results.append(load_csv_for_ranking(expected_file))
+            experiment_names.append("%s___%d" % (e, t))
+    all_results = np.concatenate([i[None] for i in results], 0).transpose((2, 0, 1))
+    # concatenate with non postprocessed models
+    all_results = np.concatenate((data_for_ranking, all_results), 1)
+    experiment_names += experiments_with_full_cv
+    final_ranks, average_rank, ranks = rank_algorithms(all_results)
+    for t in np.argsort(final_ranks):
+        print(final_ranks[t], average_rank[t], experiment_names[t])
+    # for each model, print the non postprocessed model as well as the best postprocessed model. If there are
+    # validation set predictions, apply the best threshold to the validation set
+    pred_val_base = join(base, 'predVal_PP_rank')
+    has_val_pred = []
+    for e in experiments_with_full_cv:
+        rank_nonpp = final_ranks[experiment_names.index(e)]
+        avg_rank_nonpp = average_rank[experiment_names.index(e)]
+        print(e, avg_rank_nonpp, rank_nonpp)
+        predicted_val = join(base, 'predVal', e)
+        pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
+        if len(pp_models) > 0:
+            ranks = [final_ranks[i] for i in pp_models]
+            best_idx = np.argmin(ranks)
+            best = experiment_names[pp_models[best_idx]]
+            best_avg_rank = average_rank[pp_models[best_idx]]
+            print(best, best_avg_rank, min(ranks))
+            print('')
+            # apply threshold to validation set
+            best_threshold = int(best.split('___')[-1])
+            if not isdir(predicted_val):
+                print(e, 'has not valset predictions')
+            else:
+                files = subfiles(predicted_val, suffix='.nii.gz')
+                if len(files) != expected_num_cases_val:
+                    print(e, 'has missing val cases. found: %d expected: %d' % (len(files), expected_num_cases_val))
+                else:
+                    apply_threshold_to_folder(predicted_val, join(pred_val_base, e), best_threshold, replace_with, num_processes)
+                    has_val_pred.append(e)
+        else:
+            print(e, 'not found in ranking')
+    # apply nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5 to nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold
+    e = 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5'
+    pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
+    ranks = [final_ranks[i] for i in pp_models]
+    best_idx = np.argmin(ranks)
+    best = experiment_names[pp_models[best_idx]]
+    best_avg_rank = average_rank[pp_models[best_idx]]
+    best_threshold = int(best.split('___')[-1])
+    predicted_val = join(base, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    apply_threshold_to_folder(predicted_val, join(pred_val_base, 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold'), best_threshold, replace_with, num_processes)
+    has_val_pred.append('nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    # apply nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5 to nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold
+    e = 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5'
+    pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
+    ranks = [final_ranks[i] for i in pp_models]
+    best_idx = np.argmin(ranks)
+    best = experiment_names[pp_models[best_idx]]
+    best_avg_rank = average_rank[pp_models[best_idx]]
+    best_threshold = int(best.split('___')[-1])
+    predicted_val = join(base, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    apply_threshold_to_folder(predicted_val, join(pred_val_base, 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold'), best_threshold, replace_with, num_processes)
+    has_val_pred.append('nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    # convert valsets
+    output_converted = join(base, 'converted_valSet')
+    for e in has_val_pred:
+        expected_source_folder = join(base, 'predVal_PP_rank', e)
+        if not isdir(expected_source_folder):
+            print(e, 'has no predVal_PP_rank')
+            raise RuntimeError()
+        files = subfiles(expected_source_folder, suffix='.nii.gz', join=False)
+        if len(files) != expected_num_cases_val:
+            print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases_val))
+            continue
+        target_folder = join(output_converted, 'predVal_PP_rank', e)
+        maybe_mkdir_p(target_folder)
+        convert_labels_back_to_BraTS_2018_2019_convention(expected_source_folder, target_folder)
+    # now load all the csvs for the validation set (obtained from evaluation platform) and rank our models on the
+    # validation set
+    flds = subdirs(output_converted, join=False)
+    results_valset = []
+    names_valset = []
+    for f in flds:
+        curr = join(output_converted, f)
+        experiments = subdirs(curr, join=False)
+        for e in experiments:
+            currr = join(curr, e)
+            expected_file = join(currr, 'Stats_Validation_final.csv')
+            if not isfile(expected_file):
+                print(f, e, "has not been evaluated yet!")
+            else:
+                res = load_csv_for_ranking(expected_file)[:-5]
+                assert res.shape[0] == expected_num_cases_val
+                results_valset.append(res[None])
+                names_valset.append("%s___%s" % (f, e))
+    results_valset = np.concatenate(results_valset, 0)  # experiments x cases x metrics
+    # convert to metrics x experiments x cases
+    results_valset = results_valset.transpose((2, 0, 1))
+    final_ranks, average_rank, ranks = rank_algorithms(results_valset)
+    for t in np.argsort(final_ranks):
+        print(final_ranks[t], average_rank[t], names_valset[t])
+if __name__ == "__main__":
+    """
+    THIS CODE IS A MESS. IT IS PROVIDED AS IS WITH NO GUARANTEES. YOU HAVE TO DIG THROUGH IT YOURSELF. GOOD LUCK ;-)
+    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+    """
+    task_name = "Task082_BraTS2020"
+    downloaded_data_dir = "/home/fabian/Downloads/MICCAI_BraTS2020_TrainingData"
+    downloaded_data_dir_val = "/home/fabian/Downloads/MICCAI_BraTS2020_ValidationData"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    patient_names = []
+    cur = join(downloaded_data_dir)
+    for p in subdirs(cur, join=False):
+        patdir = join(cur, p)
+        patient_name = p
+        patient_names.append(patient_name)
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        seg = join(patdir, p + "_seg.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+            isfile(seg)
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+        copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "BraTS2020"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see BraTS2020"
+    json_dict['licence'] = "see BraTS2020 license"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "T1",
+        "1": "T1ce",
+        "2": "T2",
+        "3": "FLAIR"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "edema",
+        "2": "non-enhancing",
+        "3": "enhancing",
+    }
+    json_dict['numTraining'] = len(patient_names)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_names]
+    json_dict['test'] = []
+    save_json(json_dict, join(target_base, "dataset.json"))
+    if downloaded_data_dir_val is not None:
+        for p in subdirs(downloaded_data_dir_val, join=False):
+            patdir = join(downloaded_data_dir_val, p)
+            patient_name = p
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
+    downloaded_data_dir_test = "/home/fabian/Downloads/MICCAI_BraTS2020_TestingData"
+    if isdir(downloaded_data_dir_test):
+        for p in subdirs(downloaded_data_dir_test, join=False):
+            patdir = join(downloaded_data_dir_test, p)
+            patient_name = p
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))
+    # test set
+    #  nnUNet_ensemble -f nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold -o ensembled_nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold
+    # apply_threshold_to_folder('ensembled_nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold/', 'ensemble_PP200/', 200, 2)
+    # convert_labels_back_to_BraTS_2018_2019_convention('ensemble_PP200/', 'ensemble_PP200_converted')
+    # export for publication of weights
+    # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA4_BN -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 -t 82 -o nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold.zip --disable_strict
+    # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA3_BN_BD -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 -t 82 -o nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold.zip --disable_strict
+    # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA4_BN_BD -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 -t 82 -o nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold.zip --disable_strict

nnunet/dataset_conversion/Task083_VerSe2020.py ADDED Viewed

	@@ -0,0 +1,138 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from collections import OrderedDict
+from copy import deepcopy
+from multiprocessing.pool import Pool
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.dataset_conversion.Task056_VerSe2019 import check_if_all_in_good_orientation, \
+    print_unique_labels_and_their_volumes
+from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
+from nnunet.utilities.image_reorientation import reorient_all_images_in_folder_to_ras
+def manually_change_plans():
+    pp_out_folder = join(preprocessing_output_dir, "Task083_VerSe2020")
+    original_plans = join(pp_out_folder, "nnUNetPlansv2.1_plans_3D.pkl")
+    assert isfile(original_plans)
+    original_plans = load_pickle(original_plans)
+    # let's change the network topology for lowres and fullres
+    new_plans = deepcopy(original_plans)
+    stages = len(new_plans['plans_per_stage'])
+    for s in range(stages):
+        new_plans['plans_per_stage'][s]['patch_size'] = (224, 160, 160)
+        new_plans['plans_per_stage'][s]['pool_op_kernel_sizes'] = [[2, 2, 2],
+                                                                   [2, 2, 2],
+                                                                   [2, 2, 2],
+                                                                   [2, 2, 2],
+                                                                   [2, 2, 2]] # bottleneck of 7x5x5
+        new_plans['plans_per_stage'][s]['conv_kernel_sizes'] = [[3, 3, 3],
+                                                                [3, 3, 3],
+                                                                [3, 3, 3],
+                                                                [3, 3, 3],
+                                                                [3, 3, 3],
+                                                                [3, 3, 3]]
+    save_pickle(new_plans, join(pp_out_folder, "custom_plans_3D.pkl"))
+if __name__ == "__main__":
+    ### First we create a nnunet dataset from verse. After this the images will be all willy nilly in their
+    # orientation because that's how VerSe comes
+    base = '/home/fabian/Downloads/osfstorage-archive/'
+    task_id = 83
+    task_name = "VerSe2020"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    for t in subdirs(join(base, 'training_data'), join=False):
+        train_patient_names_here = [i[:-len("_seg.nii.gz")] for i in
+                                    subfiles(join(base, "training_data", t), join=False, suffix="_seg.nii.gz")]
+        for p in train_patient_names_here:
+            curr = join(base, "training_data", t)
+            label_file = join(curr, p + "_seg.nii.gz")
+            image_file = join(curr, p + ".nii.gz")
+            shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+            shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names += train_patient_names_here
+    json_dict = OrderedDict()
+    json_dict['name'] = "VerSe2020"
+    json_dict['description'] = "VerSe2020"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(29)}
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = []
+    json_dict['training'] = [
+        {'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i
+        in
+        train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in []]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    # now we reorient all those images to ras. This saves a pkl with the original affine. We need this information to
+    # bring our predictions into the same geometry for submission
+    reorient_all_images_in_folder_to_ras(imagestr, 16)
+    reorient_all_images_in_folder_to_ras(imagests, 16)
+    reorient_all_images_in_folder_to_ras(labelstr, 16)
+    # sanity check
+    check_if_all_in_good_orientation(imagestr, labelstr, join(out_base, 'sanitycheck'))
+    # looks good to me - proceed
+    # check the volumes of the vertebrae
+    p = Pool(6)
+    _ = p.starmap(print_unique_labels_and_their_volumes, zip(subfiles(labelstr, suffix='.nii.gz'), [1000] * 113))
+    # looks good
+    # Now we are ready to run nnU-Net
+    """# run this part of the code once training is done
+    folder_gt = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task056_VerSe/labelsTr"
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_fullres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_fullres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_lowres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_lowres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_cascade_fullres/Task056_VerSe/nnUNetTrainerV2CascadeFullRes__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_cascade_fullres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)"""

nnunet/dataset_conversion/Task089_Fluo-N2DH-SIM.py ADDED Viewed

	@@ -0,0 +1,290 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from multiprocessing import Pool
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from skimage.io import imread
+from skimage.io import imsave
+from skimage.morphology import disk
+from skimage.morphology import erosion
+from skimage.transform import resize
+from nnunet.paths import nnUNet_raw_data
+def load_bmp_convert_to_nifti_borders_2d(img_file, lab_file, img_out_base, anno_out, spacing, border_thickness=0.7):
+    img = imread(img_file)
+    img_itk = sitk.GetImageFromArray(img.astype(np.float32)[None])
+    img_itk.SetSpacing(list(spacing)[::-1] + [999])
+    sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
+    if lab_file is not None:
+        l = imread(lab_file)
+        borders = generate_border_as_suggested_by_twollmann_2d(l, spacing, border_thickness)
+        l[l > 0] = 1
+        l[borders == 1] = 2
+        l_itk = sitk.GetImageFromArray(l.astype(np.uint8)[None])
+        l_itk.SetSpacing(list(spacing)[::-1] + [999])
+        sitk.WriteImage(l_itk, anno_out)
+def generate_disk(spacing, radius, dtype=int):
+    radius_in_voxels = np.round(radius / np.array(spacing)).astype(int)
+    n = 2 * radius_in_voxels + 1
+    disk_iso = disk(max(n) * 2, dtype=np.float64)
+    disk_resampled = resize(disk_iso, n, 1, 'constant', 0, clip=True, anti_aliasing=False, preserve_range=True)
+    disk_resampled[disk_resampled > 0.5] = 1
+    disk_resampled[disk_resampled <= 0.5] = 0
+    return disk_resampled.astype(dtype)
+def generate_border_as_suggested_by_twollmann_2d(label_img: np.ndarray, spacing,
+                                                 border_thickness: float = 2) -> np.ndarray:
+    border = np.zeros_like(label_img)
+    selem = generate_disk(spacing, border_thickness)
+    for l in np.unique(label_img):
+        if l == 0: continue
+        mask = (label_img == l).astype(int)
+        eroded = erosion(mask, selem)
+        border[(eroded == 0) & (mask != 0)] = 1
+    return border
+def prepare_task(base, task_id, task_name, spacing, border_thickness: float = 15):
+    p = Pool(16)
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    res = []
+    for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
+        train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
+        for t in train_cases:
+            casename = train_sequence + "_" + t[:-4]
+            img_file = join(base + '_train', train_sequence, t)
+            lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
+            if not isfile(lab_file):
+                continue
+            img_out_base = join(imagestr, casename)
+            anno_out = join(labelstr, casename + ".nii.gz")
+            res.append(
+                p.starmap_async(load_bmp_convert_to_nifti_borders_2d,
+                                ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
+            train_patient_names.append(casename)
+    for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
+        test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
+        for t in test_cases:
+            casename = test_sequence + "_" + t[:-4]
+            img_file = join(base + '_test', test_sequence, t)
+            lab_file = None
+            img_out_base = join(imagests, casename)
+            anno_out = None
+            res.append(
+                p.starmap_async(load_bmp_convert_to_nifti_borders_2d,
+                                ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
+            test_patient_names.append(casename)
+    _ = [i.get() for i in res]
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "BF",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "cell",
+        "2": "border",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    p.close()
+    p.join()
+def convert_to_instance_seg(arr: np.ndarray, spacing: tuple = (0.125, 0.125), small_center_threshold: int = 30,
+                            isolated_border_as_separate_instance_threshold=15):
+    from skimage.morphology import label, dilation
+    # we first identify centers that are too small and set them to be border. This should remove false positive instances
+    objects = label((arr == 1).astype(int))
+    for o in np.unique(objects):
+        if o > 0 and np.sum(objects == o) <= small_center_threshold:
+            arr[objects == o] = 2
+    # 1 is core, 2 is border
+    objects = label((arr == 1).astype(int))
+    final = np.copy(objects)
+    remaining_border = arr == 2
+    current = np.copy(objects)
+    dilated_mm = np.array((0, 0))
+    spacing = np.array(spacing)
+    while np.sum(remaining_border) > 0:
+        strel_size = [0, 0]
+        maximum_dilation = max(dilated_mm)
+        for i in range(2):
+            if spacing[i] == min(spacing):
+                strel_size[i] = 1
+                continue
+            if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
+                strel_size[i] = 1
+        ball_here = disk(1)
+        if strel_size[0] == 0: ball_here = ball_here[1:2]
+        if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
+        #print(1)
+        dilated = dilation(current, ball_here)
+        diff = (current == 0) & (dilated != current)
+        final[diff & remaining_border] = dilated[diff & remaining_border]
+        remaining_border[diff] = 0
+        current = dilated
+        dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(2)]
+    # what can happen is that a cell is so small that the network only predicted border and no core. This cell will be
+    # fused with the nearest other instance, which we don't want. Therefore we identify isolated border predictions and
+    # give them a separate instance id
+    # we identify isolated border predictions by checking each foreground object in arr and see whether this object
+    # also contains label 1
+    max_label = np.max(final)
+    foreground_objects = label((arr != 0).astype(int))
+    for i in np.unique(foreground_objects):
+        if i > 0 and (1 not in np.unique(arr[foreground_objects==i])):
+            size_of_object = np.sum(foreground_objects==i)
+            if size_of_object >= isolated_border_as_separate_instance_threshold:
+                final[foreground_objects == i] = max_label + 1
+                max_label += 1
+                #print('yeah boi')
+    return final.astype(np.uint32)
+def load_convert_to_instance_save(file_in: str, file_out: str, spacing):
+    img = sitk.ReadImage(file_in)
+    img_npy = sitk.GetArrayFromImage(img)
+    out = convert_to_instance_seg(img_npy[0], spacing)[None]
+    out_itk = sitk.GetImageFromArray(out.astype(np.int16))
+    out_itk.CopyInformation(img)
+    sitk.WriteImage(out_itk, file_out)
+def convert_folder_to_instanceseg(folder_in: str, folder_out: str, spacing, processes: int = 12):
+    input_files = subfiles(folder_in, suffix=".nii.gz", join=False)
+    maybe_mkdir_p(folder_out)
+    output_files = [join(folder_out, i) for i in input_files]
+    input_files = [join(folder_in, i) for i in input_files]
+    p = Pool(processes)
+    r = []
+    for i, o in zip(input_files, output_files):
+        r.append(
+            p.starmap_async(
+                load_convert_to_instance_save,
+                ((i, o, spacing),)
+            )
+        )
+    _ = [i.get() for i in r]
+    p.close()
+    p.join()
+def convert_to_tiff(nifti_image: str, output_name: str):
+    npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti_image))
+    imsave(output_name, npy[0].astype(np.uint16),  compress=6)
+if __name__ == "__main__":
+    base = "/home/fabian/Downloads/Fluo-N2DH-SIM+"
+    task_name = 'Fluo-N2DH-SIM'
+    spacing = (0.125, 0.125)
+    task_id = 999
+    border_thickness = 0.7
+    prepare_task(base, task_id, task_name, spacing, border_thickness)
+    task_id = 89
+    additional_time_steps = 4
+    task_name = 'Fluo-N2DH-SIM_thickborder_time'
+    full_taskname = 'Task%03.0d_' % task_id + task_name
+    output_raw = join(nnUNet_raw_data, full_taskname)
+    shutil.rmtree(output_raw)
+    shutil.copytree(join(nnUNet_raw_data, 'Task999_Fluo-N2DH-SIM_thickborder'), output_raw)
+    shutil.rmtree(join(nnUNet_raw_data, 'Task999_Fluo-N2DH-SIM_thickborder'))
+    # now add additional time information
+    for fld in ['imagesTr', 'imagesTs']:
+        curr = join(output_raw, fld)
+        for seq in ['01', '02']:
+            images = subfiles(curr, prefix=seq, join=False)
+            for i in images:
+                current_timestep = int(i.split('_')[1][1:])
+                renamed = join(curr, i.replace("_0000", "_%04.0d" % additional_time_steps))
+                shutil.move(join(curr, i), renamed)
+                for previous_timestep in range(-additional_time_steps, 0):
+                    # previous time steps will already have been processed and renamed!
+                    expected_filename = join(curr, seq + "_t%03.0d" % (
+                                current_timestep + previous_timestep) + "_%04.0d" % additional_time_steps + ".nii.gz")
+                    if not isfile(expected_filename):
+                        # create empty image
+                        img = sitk.ReadImage(renamed)
+                        empty = sitk.GetImageFromArray(np.zeros_like(sitk.GetArrayFromImage(img)))
+                        empty.CopyInformation(img)
+                        sitk.WriteImage(empty, join(curr, i.replace("_0000", "_%04.0d" % (
+                                    additional_time_steps + previous_timestep))))
+                    else:
+                        shutil.copy(expected_filename, join(curr, i.replace("_0000", "_%04.0d" % (
+                                    additional_time_steps + previous_timestep))))
+    dataset = load_json(join(output_raw, 'dataset.json'))
+    dataset['modality'] = {
+        '0': 't_minus 4',
+        '1': 't_minus 3',
+        '2': 't_minus 2',
+        '3': 't_minus 1',
+        '4': 'frame of interest',
+    }
+    save_json(dataset, join(output_raw, 'dataset.json'))
+    # we do not need custom splits since we train on all training cases
+    # test set predictions are converted to instance seg with convert_folder_to_instanceseg
+    # test set predictions are converted to tiff with convert_to_tiff

nnunet/dataset_conversion/Task114_heart_MNMs.py ADDED Viewed

	@@ -0,0 +1,262 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from collections import OrderedDict
+import numpy as np
+import pandas as pd
+from batchgenerators.utilities.file_and_folder_operations import *
+from numpy.random.mtrand import RandomState
+from nnunet.experiment_planning.common_utils import split_4d_nifti
+def get_mnms_data(data_root):
+    files_raw = []
+    files_gt = []
+    for r, dirs, files in os.walk(data_root):
+        for f in files:
+            if f.endswith('nii.gz'):
+                file_path = os.path.join(r, f)
+                if '_gt' in f:
+                    files_gt.append(file_path)
+                else:
+                    files_raw.append(file_path)
+    return files_raw, files_gt
+def generate_filename_for_nnunet(pat_id, ts, pat_folder=None, add_zeros=False, vendor=None, centre=None, mode='mnms',
+                                 data_format='nii.gz'):
+    if not vendor or not centre:
+        if add_zeros:
+            filename = "{}_{}_0000.{}".format(pat_id, str(ts).zfill(4), data_format)
+        else:
+            filename = "{}_{}.{}".format(pat_id, str(ts).zfill(4), data_format)
+    else:
+        if mode == 'mnms':
+            if add_zeros:
+                filename = "{}_{}_{}_{}_0000.{}".format(pat_id, str(ts).zfill(4), vendor, centre, data_format)
+            else:
+                filename = "{}_{}_{}_{}.{}".format(pat_id, str(ts).zfill(4), vendor, centre, data_format)
+        else:
+            if add_zeros:
+                filename = "{}_{}_{}_{}_0000.{}".format(vendor, centre, pat_id, str(ts).zfill(4), data_format)
+            else:
+                filename = "{}_{}_{}_{}.{}".format(vendor, centre, pat_id, str(ts).zfill(4), data_format)
+    if pat_folder:
+        filename = os.path.join(pat_folder, filename)
+    return filename
+def select_annotated_frames_mms(data_folder, out_folder, add_zeros=False, is_gt=False,
+                                df_path="/media/full/tera2/data/challenges/mms/Training-corrected_original/M&Ms Dataset Information.xlsx",
+                                mode='mnms',):
+    table = pd.read_excel(df_path, index_col='External code')
+    for idx in table.index:
+        ed = table.loc[idx, 'ED']
+        es = table.loc[idx, 'ES']
+        vendor = table.loc[idx, 'Vendor']
+        centre = table.loc[idx, 'Centre']
+        if vendor != "C": # vendor C is for test data
+            # this step is needed in case of M&Ms data to adjust it to the nnUNet frame work
+            # generate old filename (w/o vendor and centre)
+            if is_gt:
+                add_to_name = 'sa_gt'
+            else:
+                add_to_name = 'sa'
+            filename_ed_original = os.path.join(
+                data_folder, "{}_{}_{}.nii.gz".format(idx, add_to_name, str(ed).zfill(4)))
+            filename_es_original = os.path.join(
+                data_folder, "{}_{}_{}.nii.gz".format(idx, add_to_name, str(es).zfill(4)))
+            # generate new filename with vendor and centre
+            filename_ed = generate_filename_for_nnunet(pat_id=idx, ts=ed, pat_folder=out_folder,
+                                                       vendor=vendor, centre=centre, add_zeros=add_zeros, mode=mode)
+            filename_es = generate_filename_for_nnunet(pat_id=idx, ts=es, pat_folder=out_folder,
+                                                       vendor=vendor, centre=centre, add_zeros=add_zeros, mode=mode)
+            shutil.copy(filename_ed_original, filename_ed)
+            shutil.copy(filename_es_original, filename_es)
+def create_custom_splits_for_experiments(task_path):
+    data_keys = [i[:-4] for i in
+                 subfiles(os.path.join(task_path, "nnUNetData_plans_v2.1_2D_stage0"),
+                          join=False, suffix='npz')]
+    existing_splits = os.path.join(task_path, "splits_final.pkl")
+    splits = load_pickle(existing_splits)
+    splits = splits[:5]  # discard old changes
+    unique_a_only = np.unique([i.split('_')[0] for i in data_keys if i.find('_A_') != -1])
+    unique_b_only = np.unique([i.split('_')[0] for i in data_keys if i.find('_B_') != -1])
+    num_train_a = int(np.round(0.8 * len(unique_a_only)))
+    num_train_b = int(np.round(0.8 * len(unique_b_only)))
+    p = RandomState(1234)
+    idx_a_train = p.choice(len(unique_a_only), num_train_a, replace=False)
+    idx_b_train = p.choice(len(unique_b_only), num_train_b, replace=False)
+    identifiers_a_train = [unique_a_only[i] for i in idx_a_train]
+    identifiers_b_train = [unique_b_only[i] for i in idx_b_train]
+    identifiers_a_val = [i for i in unique_a_only if i not in identifiers_a_train]
+    identifiers_b_val = [i for i in unique_b_only if i not in identifiers_b_train]
+    # fold 5 will be train on a and eval on val sets of a and b
+    splits.append({'train': [i for i in data_keys if i.split("_")[0] in identifiers_a_train],
+                   'val': [i for i in data_keys if i.split("_")[0] in identifiers_a_val] + [i for i in data_keys if
+                                                                                            i.split("_")[
+                                                                                                0] in identifiers_b_val]})
+    # fold 6 will be train on b and eval on val sets of a and b
+    splits.append({'train': [i for i in data_keys if i.split("_")[0] in identifiers_b_train],
+                   'val': [i for i in data_keys if i.split("_")[0] in identifiers_a_val] + [i for i in data_keys if
+                                                                                            i.split("_")[
+                                                                                                0] in identifiers_b_val]})
+    # fold 7 train on both, eval on both
+    splits.append({'train': [i for i in data_keys if i.split("_")[0] in identifiers_b_train] + [i for i in data_keys if i.split("_")[0] in identifiers_a_train],
+                   'val': [i for i in data_keys if i.split("_")[0] in identifiers_a_val] + [i for i in data_keys if
+                                                                                            i.split("_")[
+                                                                                                0] in identifiers_b_val]})
+    save_pickle(splits, existing_splits)
+if __name__ == "__main__":
+    # this script will split 4d data from the M&Ms data set into 3d images for both, raw images and gt annotations.
+    # after this script you will be able to start a training on the M&Ms data.
+    # use this script as inspiration in case other data than M&Ms data is use for training.
+    #
+    # check also the comments at the END of the script for instructions on how to run the actual training after this
+    # script
+    #
+    # define a task ID for your experiment (I have choosen 114)
+    task_name = "Task679_heart_mnms"
+    # this is where the downloaded data from the M&Ms challenge shall be placed
+    raw_data_dir = "/media/full/tera2/data"
+    # set path to official ***M&Ms Dataset Information.xlsx*** file
+    df_path = "/media/full/tera2/data/challenges/mms/Training-corrected_original/M&Ms Dataset Information.xlsx"
+    # don't make changes here
+    folder_imagesTr = "imagesTr"
+    train_dir = os.path.join(raw_data_dir, task_name, folder_imagesTr)
+    # this is where our your splitted files WITH annotation will be stored. Dont make changes here. Otherwise nnUNet
+    # might have problems finding the training data later during the training process
+    out_dir = os.path.join(os.environ.get('nnUNet_raw_data_base'), 'nnUNet_raw_data', task_name)
+    files_raw, files_gt = get_mnms_data(data_root=train_dir)
+    filesTs, _ = get_mnms_data(data_root=train_dir)
+    split_path_raw_all_ts = os.path.join(raw_data_dir, task_name, "splitted_all_timesteps", folder_imagesTr,
+                                         "split_raw_images")
+    split_path_gt_all_ts = os.path.join(raw_data_dir, task_name, "splitted_all_timesteps", folder_imagesTr,
+                                        "split_annotation")
+    maybe_mkdir_p(split_path_raw_all_ts)
+    maybe_mkdir_p(split_path_gt_all_ts)
+    # for fast splitting of many patients use the following lines
+    # however keep in mind that these lines cause problems for some users.
+    # If problems occur use the code for loops below
+    # print("splitting raw 4d images into 3d images")
+    # split_4d_for_all_pat(files_raw, split_path_raw)
+    # print("splitting ground truth 4d into 3d files")
+    # split_4d_for_all_pat(files_gt, split_path_gt_all_ts)
+    print("splitting raw 4d images into 3d images")
+    for f in files_raw:
+        print("splitting {}".format(f))
+        split_4d_nifti(f, split_path_raw_all_ts)
+    print("splitting ground truth 4d into 3d files")
+    for gt in files_gt:
+        split_4d_nifti(gt, split_path_gt_all_ts)
+        print("splitting {}".format(gt))
+    print("prepared data will be saved at: {}".format(out_dir))
+    maybe_mkdir_p(join(out_dir, "imagesTr"))
+    maybe_mkdir_p(join(out_dir, "labelsTr"))
+    imagesTr_path = os.path.join(out_dir, "imagesTr")
+    labelsTr_path = os.path.join(out_dir, "labelsTr")
+    # only a small fraction of all timestep in the cardiac cycle possess gt annotation. These timestep will now be
+    # selected
+    select_annotated_frames_mms(split_path_raw_all_ts, imagesTr_path, add_zeros=True, is_gt=False, df_path=df_path)
+    select_annotated_frames_mms(split_path_gt_all_ts, labelsTr_path, add_zeros=False, is_gt=True, df_path=df_path)
+    labelsTr = subfiles(labelsTr_path)
+    # create a json file that will be needed by nnUNet to initiate the preprocessing process
+    json_dict = OrderedDict()
+    json_dict['name'] = "M&Ms"
+    json_dict['description'] = "short axis cardiac cine MRI segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "Campello, Victor M et al. “Multi-Centre, Multi-Vendor and Multi-Disease Cardiac " \
+                             "Segmentation: The M&Ms Challenge.” IEEE transactions on " \
+                             "medical imaging vol. 40,12 (2021): 3543-3554. doi:10.1109/TMI.2021.3090082"
+    json_dict['licence'] = "see M&Ms challenge"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    # labels differ for ACDC challenge
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "LVBP",
+        "2": "LVM",
+        "3": "RV"
+    }
+    json_dict['numTraining'] = len(labelsTr)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s" % i.split("/")[-1],
+                              "label": "./labelsTr/%s" % i.split("/")[-1]} for i in labelsTr]
+    json_dict['test'] = []
+    save_json(json_dict, os.path.join(out_dir, "dataset.json"))
+    #
+    # now the data is ready to be preprocessed by the nnUNet
+    # the following steps are only needed if you want to reproduce the exact results from the MMS challenge
+    #
+    # then preprocess data and plan training.
+    # run in terminal
+    # nnUNet_plan_and_preprocess -t 114 --verify_dataset_integrity # for 2d
+    # nnUNet_plan_and_preprocess -t 114 --verify_dataset_integrity -pl3d ExperimentPlannerTargetSpacingForAnisoAxis # for 3d
+    # start training and stop it immediately to get a split.pkl file
+    # nnUNet_train 2d nnUNetTrainerV2_MMS 114 0
+    #
+    # then create custom splits as used for the final M&Ms submission
+    #
+    # in this file comment everything except for the following line
+    # create_custom_splits_for_experiments(out_dir)
+    # then start training with
+    #
+    # nnUNet_train 3d_fullres nnUNetTrainerV2_MMS Task114_heart_mnms -p nnUNetPlanstargetSpacingForAnisoAxis 0 # for 3d and fold 0
+    # and
+    # nnUNet_train 2d nnUNetTrainerV2_MMS Task114_heart_mnms 0 # for 2d and fold 0

nnunet/dataset_conversion/Task115_COVIDSegChallenge.py ADDED Viewed

	@@ -0,0 +1,344 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+import subprocess
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.dataset_conversion.utils import generate_dataset_json
+from nnunet.paths import nnUNet_raw_data
+from nnunet.paths import preprocessing_output_dir
+from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name
+def increase_batch_size(plans_file: str, save_as: str, bs_factor: int):
+    a = load_pickle(plans_file)
+    stages = list(a['plans_per_stage'].keys())
+    for s in stages:
+        a['plans_per_stage'][s]['batch_size'] *= bs_factor
+    save_pickle(a, save_as)
+def prepare_submission(folder_in, folder_out):
+    nii = subfiles(folder_in, suffix='.gz', join=False)
+    maybe_mkdir_p(folder_out)
+    for n in nii:
+        i = n.split('-')[-1][:-10]
+        shutil.copy(join(folder_in, n), join(folder_out, i + '.nii.gz'))
+def get_ids_from_folder(folder):
+    cts = subfiles(folder, suffix='_ct.nii.gz', join=False)
+    ids = []
+    for c in cts:
+        ids.append(c.split('-')[-1][:-10])
+    return ids
+def postprocess_submission(folder_ct, folder_pred, folder_postprocessed, bbox_distance_to_seg_in_cm=7.5):
+    """
+    segment with lung mask, get bbox from that, use bbox to remove predictions in background
+    WE EXPERIMENTED WITH THAT ON THE VALIDATION SET AND FOUND THAT IT DOESN'T DO ANYTHING. NOT USED FOR TEST SET
+    """
+    # pip install git+https://github.com/JoHof/lungmask
+    cts = subfiles(folder_ct, suffix='_ct.nii.gz', join=False)
+    output_files = [i[:-10] + '_lungmask.nii.gz' for i in cts]
+    # run lungmask on everything
+    for i, o in zip(cts, output_files):
+        if not isfile(join(folder_ct, o)):
+            subprocess.call(['lungmask', join(folder_ct, i), join(folder_ct, o), '--modelname', 'R231CovidWeb'])
+    if not isdir(folder_postprocessed):
+        maybe_mkdir_p(folder_postprocessed)
+    ids = get_ids_from_folder(folder_ct)
+    for i in ids:
+        # find lungmask
+        lungmask_file = join(folder_ct, 'volume-covid19-A-' + i + '_lungmask.nii.gz')
+        if not isfile(lungmask_file):
+            raise RuntimeError('missing lung')
+        seg_file = join(folder_pred, 'volume-covid19-A-' + i + '_ct.nii.gz')
+        if not isfile(seg_file):
+            raise RuntimeError('missing seg')
+        lung_mask = sitk.GetArrayFromImage(sitk.ReadImage(lungmask_file))
+        seg_itk = sitk.ReadImage(seg_file)
+        seg = sitk.GetArrayFromImage(seg_itk)
+        where = np.argwhere(lung_mask != 0)
+        bbox = [
+            [min(where[:, 0]), max(where[:, 0])],
+            [min(where[:, 1]), max(where[:, 1])],
+            [min(where[:, 2]), max(where[:, 2])],
+        ]
+        spacing = np.array(seg_itk.GetSpacing())[::-1]
+        # print(bbox)
+        for dim in range(3):
+            sp = spacing[dim]
+            voxels_extend = max(int(np.ceil(bbox_distance_to_seg_in_cm / sp)), 1)
+            bbox[dim][0] = max(0, bbox[dim][0] - voxels_extend)
+            bbox[dim][1] = min(seg.shape[dim], bbox[dim][1] + voxels_extend)
+        # print(bbox)
+        seg_old = np.copy(seg)
+        seg[0:bbox[0][0], :, :] = 0
+        seg[bbox[0][1]:, :, :] = 0
+        seg[:, 0:bbox[1][0], :] = 0
+        seg[:, bbox[1][1]:, :] = 0
+        seg[:, :, 0:bbox[2][0]] = 0
+        seg[:, :, bbox[2][1]:] = 0
+        if np.any(seg_old != seg):
+            print('changed seg', i)
+            argwhere = np.argwhere(seg != seg_old)
+            print(argwhere[np.random.choice(len(argwhere), 10)])
+        seg_corr = sitk.GetImageFromArray(seg)
+        seg_corr.CopyInformation(seg_itk)
+        sitk.WriteImage(seg_corr, join(folder_postprocessed, 'volume-covid19-A-' + i + '_ct.nii.gz'))
+def manually_set_configurations():
+    """
+    ALSO NOT USED!
+    :return:
+    """
+    task115_dir = join(preprocessing_output_dir, convert_id_to_task_name(115))
+    ## larger patch size
+    # task115 3d_fullres default is:
+    """
+    {'batch_size': 2,
+    'num_pool_per_axis': [2, 6, 6],
+    'patch_size': array([ 28, 256, 256]),
+    'median_patient_size_in_voxels': array([ 62, 512, 512]),
+    'current_spacing': array([5.        , 0.74199998, 0.74199998]),
+    'original_spacing': array([5.        , 0.74199998, 0.74199998]),
+    'do_dummy_2D_data_aug': True,
+    'pool_op_kernel_sizes': [[1, 2, 2], [1, 2, 2], [2, 2, 2], [2, 2, 2], [1, 2, 2], [1, 2, 2]],
+    'conv_kernel_sizes': [[1, 3, 3], [1, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3]]}
+    """
+    plans = load_pickle(join(task115_dir, 'nnUNetPlansv2.1_plans_3D.pkl'))
+    fullres_stage = plans['plans_per_stage'][1]
+    fullres_stage['patch_size'] = np.array([ 64, 320, 320])
+    fullres_stage['num_pool_per_axis'] = [4, 6, 6]
+    fullres_stage['pool_op_kernel_sizes'] = [[1, 2, 2],
+                                            [1, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2]]
+    fullres_stage['conv_kernel_sizes'] = [[1, 3, 3],
+                                        [1, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3]]
+    save_pickle(plans, join(task115_dir, 'nnUNetPlansv2.1_custom_plans_3D.pkl'))
+    ## larger batch size
+    # (default for all 3d trainings is batch size 2)
+    increase_batch_size(join(task115_dir, 'nnUNetPlansv2.1_plans_3D.pkl'), join(task115_dir, 'nnUNetPlansv2.1_bs3x_plans_3D.pkl'), 3)
+    increase_batch_size(join(task115_dir, 'nnUNetPlansv2.1_plans_3D.pkl'), join(task115_dir, 'nnUNetPlansv2.1_bs5x_plans_3D.pkl'), 5)
+    # residual unet
+    """
+    default is:
+    Out[7]:
+    {'batch_size': 2,
+     'num_pool_per_axis': [2, 6, 5],
+     'patch_size': array([ 28, 256, 224]),
+     'median_patient_size_in_voxels': array([ 62, 512, 512]),
+     'current_spacing': array([5.        , 0.74199998, 0.74199998]),
+     'original_spacing': array([5.        , 0.74199998, 0.74199998]),
+     'do_dummy_2D_data_aug': True,
+     'pool_op_kernel_sizes': [[1, 1, 1],
+      [1, 2, 2],
+      [1, 2, 2],
+      [2, 2, 2],
+      [2, 2, 2],
+      [1, 2, 2],
+      [1, 2, 1]],
+     'conv_kernel_sizes': [[1, 3, 3],
+      [1, 3, 3],
+      [3, 3, 3],
+      [3, 3, 3],
+      [3, 3, 3],
+      [3, 3, 3],
+      [3, 3, 3]],
+     'num_blocks_encoder': (1, 2, 3, 4, 4, 4, 4),
+     'num_blocks_decoder': (1, 1, 1, 1, 1, 1)}
+    """
+    plans = load_pickle(join(task115_dir, 'nnUNetPlans_FabiansResUNet_v2.1_plans_3D.pkl'))
+    fullres_stage = plans['plans_per_stage'][1]
+    fullres_stage['patch_size'] = np.array([ 56, 256, 256])
+    fullres_stage['num_pool_per_axis'] = [3, 6, 6]
+    fullres_stage['pool_op_kernel_sizes'] = [[1, 1, 1],
+                                             [1, 2, 2],
+                                             [1, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [1, 2, 2]]
+    fullres_stage['conv_kernel_sizes'] = [[1, 3, 3],
+                                        [1, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3],
+                                        [3, 3, 3]]
+    save_pickle(plans, join(task115_dir, 'nnUNetPlans_FabiansResUNet_v2.1_custom_plans_3D.pkl'))
+def check_same(img1: str, img2: str):
+    """
+    checking initial vs corrected dataset
+    :param img1:
+    :param img2:
+    :return:
+    """
+    img1 = sitk.GetArrayFromImage(sitk.ReadImage(img1))
+    img2 = sitk.GetArrayFromImage(sitk.ReadImage(img2))
+    if not np.all([i==j for i, j in zip(img1.shape, img2.shape)]):
+        print('shape')
+        return False
+    else:
+        same = np.all(img1==img2)
+        if same: return True
+        else:
+            diffs = np.argwhere(img1!=img2)
+            print('content in', diffs.shape[0], 'voxels')
+            print('random disagreements:')
+            print(diffs[np.random.choice(len(diffs), min(3, diffs.shape[0]), replace=False)])
+            return False
+def check_dataset_same(dataset_old='/home/fabian/Downloads/COVID-19-20/Train',
+                       dataset_new='/home/fabian/data/COVID-19-20_officialCorrected/COVID-19-20_v2/Train'):
+    """
+    :param dataset_old:
+    :param dataset_new:
+    :return:
+    """
+    cases = [i[:-10] for i in subfiles(dataset_new, suffix='_ct.nii.gz', join=False)]
+    for c in cases:
+        data_file = join(dataset_old, c + '_ct_corrDouble.nii.gz')
+        corrected_double = False
+        if not isfile(data_file):
+            data_file = join(dataset_old, c+'_ct.nii.gz')
+        else:
+            corrected_double = True
+        data_file_new = join(dataset_new, c+'_ct.nii.gz')
+        same = check_same(data_file, data_file_new)
+        if not same: print('data differs in case', c, '\n')
+        seg_file = join(dataset_old, c + '_seg_corrDouble_corrected.nii.gz')
+        if not isfile(seg_file):
+            seg_file = join(dataset_old, c + '_seg_corrected_auto.nii.gz')
+            if isfile(seg_file):
+                assert ~corrected_double
+            else:
+                seg_file = join(dataset_old, c + '_seg_corrected.nii.gz')
+                if isfile(seg_file):
+                    assert ~corrected_double
+                else:
+                    seg_file = join(dataset_old, c + '_seg_corrDouble.nii.gz')
+                    if isfile(seg_file):
+                        assert ~corrected_double
+                    else:
+                        seg_file = join(dataset_old, c + '_seg.nii.gz')
+        seg_file_new = join(dataset_new, c + '_seg.nii.gz')
+        same = check_same(seg_file, seg_file_new)
+        if not same: print('seg differs in case', c, '\n')
+if __name__ == '__main__':
+    # this is the folder containing the data as downloaded from https://covid-segmentation.grand-challenge.org/COVID-19-20/
+    # (zip file was decompressed!)
+    downloaded_data_dir = '/home/fabian/data/COVID-19-20_officialCorrected/COVID-19-20_v2/'
+    task_name = "Task115_COVIDSegChallenge"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_labelsTr)
+    train_orig = join(downloaded_data_dir, "Train")
+    # convert training set
+    cases = [i[:-10] for i in subfiles(train_orig, suffix='_ct.nii.gz', join=False)]
+    for c in cases:
+        data_file = join(train_orig, c+'_ct.nii.gz')
+        # before there was the official corrected dataset we did some corrections of our own. These corrections were
+        # dropped when the official dataset was revised.
+        seg_file = join(train_orig, c + '_seg_corrected.nii.gz')
+        if not isfile(seg_file):
+            seg_file = join(train_orig, c + '_seg.nii.gz')
+        shutil.copy(data_file, join(target_imagesTr, c + "_0000.nii.gz"))
+        shutil.copy(seg_file, join(target_labelsTr, c + '.nii.gz'))
+    val_orig = join(downloaded_data_dir, "Validation")
+    cases = [i[:-10] for i in subfiles(val_orig, suffix='_ct.nii.gz', join=False)]
+    for c in cases:
+        data_file = join(val_orig, c + '_ct.nii.gz')
+        shutil.copy(data_file, join(target_imagesVal, c + "_0000.nii.gz"))
+    generate_dataset_json(
+        join(target_base, 'dataset.json'),
+        target_imagesTr,
+        None,
+        ("CT", ),
+        {0: 'background', 1: 'covid'},
+        task_name,
+        dataset_reference='https://covid-segmentation.grand-challenge.org/COVID-19-20/'
+    )
+    # performance summary (train set 5-fold cross-validation)
+    # baselines
+    # 3d_fullres nnUNetTrainerV2__nnUNetPlans_v2.1						            0.7441
+    # 3d_lowres nnUNetTrainerV2__nnUNetPlans_v2.1						            0.745
+    # models used for test set prediction
+    # 3d_fullres nnUNetTrainerV2_ResencUNet_DA3__nnUNetPlans_FabiansResUNet_v2.1	0.7543
+    # 3d_fullres nnUNetTrainerV2_ResencUNet__nnUNetPlans_FabiansResUNet_v2.1		0.7527
+    # 3d_lowres nnUNetTrainerV2_ResencUNet_DA3_BN__nnUNetPlans_FabiansResUNet_v2.1	0.7513
+    # 3d_fullres nnUNetTrainerV2_DA3_BN__nnUNetPlans_v2.1					        0.7498
+    # 3d_fullres nnUNetTrainerV2_DA3__nnUNetPlans_v2.1					            0.7532
+    # Test set prediction
+    # nnUNet_predict -i COVID-19-20_TestSet -o covid_testset_predictions/3d_fullres/nnUNetTrainerV2_ResencUNet_DA3__nnUNetPlans_FabiansResUNet_v2.1 -tr nnUNetTrainerV2_ResencUNet_DA3 -p nnUNetPlans_FabiansResUNet_v2.1 -m 3d_fullres -f 0 1 2 3 4 5 6 7 8 9 -t 115 -z
+    # nnUNet_predict -i COVID-19-20_TestSet -o covid_testset_predictions/3d_fullres/nnUNetTrainerV2_ResencUNet__nnUNetPlans_FabiansResUNet_v2.1 -tr nnUNetTrainerV2_ResencUNet -p nnUNetPlans_FabiansResUNet_v2.1 -m 3d_fullres -f 0 1 2 3 4 5 6 7 8 9 -t 115 -z
+    # nnUNet_predict -i COVID-19-20_TestSet -o covid_testset_predictions/3d_lowres/nnUNetTrainerV2_ResencUNet_DA3_BN__nnUNetPlans_FabiansResUNet_v2.1 -tr nnUNetTrainerV2_ResencUNet_DA3_BN -p nnUNetPlans_FabiansResUNet_v2.1 -m 3d_lowres -f 0 1 2 3 4 5 6 7 8 9 -t 115 -z
+    # nnUNet_predict -i COVID-19-20_TestSet -o covid_testset_predictions/3d_fullres/nnUNetTrainerV2_DA3_BN__nnUNetPlans_v2.1 -tr nnUNetTrainerV2_DA3_BN -m 3d_fullres -f 0 1 2 3 4 5 6 7 8 9 -t 115 -z
+    # nnUNet_predict -i COVID-19-20_TestSet -o covid_testset_predictions/3d_fullres/nnUNetTrainerV2_DA3__nnUNetPlans_v2.1 -tr nnUNetTrainerV2_DA3 -m 3d_fullres -f 0 1 2 3 4 5 6 7 8 9 -t 115 -z
+    # nnUNet_ensemble -f 3d_lowres/nnUNetTrainerV2_ResencUNet_DA3_BN__nnUNetPlans_FabiansResUNet_v2.1/ 3d_fullres/nnUNetTrainerV2_ResencUNet__nnUNetPlans_FabiansResUNet_v2.1/ 3d_fullres/nnUNetTrainerV2_ResencUNet_DA3__nnUNetPlans_FabiansResUNet_v2.1/ 3d_fullres/nnUNetTrainerV2_DA3_BN__nnUNetPlans_v2.1/ 3d_fullres/nnUNetTrainerV2_DA3__nnUNetPlans_v2.1/ -o ensembled

nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.dataset_conversion.utils import generate_dataset_json
+from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
+from nnunet.utilities.file_conversions import convert_2d_image_to_nifti
+if __name__ == '__main__':
+    """
+    nnU-Net was originally built for 3D images. It is also strongest when applied to 3D segmentation problems because a
+    large proportion of its design choices were built with 3D in mind. Also note that many 2D segmentation problems,
+    especially in the non-biomedical domain, may benefit from pretrained network architectures which nnU-Net does not
+    support.
+    Still, there is certainly a need for an out of the box segmentation solution for 2D segmentation problems. And
+    also on 2D segmentation tasks nnU-Net cam perform extremely well! We have, for example, won a 2D task in the cell
+    tracking challenge with nnU-Net (see our Nature Methods paper) and we have also successfully applied nnU-Net to
+    histopathological segmentation problems.
+    Working with 2D data in nnU-Net requires a small workaround in the creation of the dataset. Essentially, all images
+    must be converted to pseudo 3D images (so an image with shape (X, Y) needs to be converted to an image with shape
+    (1, X, Y). The resulting image must be saved in nifti format. Hereby it is important to set the spacing of the
+    first axis (the one with shape 1) to a value larger than the others. If you are working with niftis anyways, then
+    doing this should be easy for you. This example here is intended for demonstrating how nnU-Net can be used with
+    'regular' 2D images. We selected the massachusetts road segmentation dataset for this because it can be obtained
+    easily, it comes with a good amount of training cases but is still not too large to be difficult to handle.
+    """
+    # download dataset from https://www.kaggle.com/insaff/massachusetts-roads-dataset
+    # extract the zip file, then set the following path according to your system:
+    base = '/media/fabian/data/road_segmentation_ideal'
+    # this folder should have the training and testing subfolders
+    # now start the conversion to nnU-Net:
+    task_name = 'Task120_MassRoadsSeg'
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTs = join(target_base, "labelsTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_labelsTs)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    # convert the training examples. Not all training images have labels, so we just take the cases for which there are
+    # labels
+    labels_dir_tr = join(base, 'training', 'output')
+    images_dir_tr = join(base, 'training', 'input')
+    training_cases = subfiles(labels_dir_tr, suffix='.png', join=False)
+    for t in training_cases:
+        unique_name = t[:-4]  # just the filename with the extension cropped away, so img-2.png becomes img-2 as unique_name
+        input_segmentation_file = join(labels_dir_tr, t)
+        input_image_file = join(images_dir_tr, t)
+        output_image_file = join(target_imagesTr, unique_name)  # do not specify a file ending! This will be done for you
+        output_seg_file = join(target_labelsTr, unique_name)  # do not specify a file ending! This will be done for you
+        # this utility will convert 2d images that can be read by skimage.io.imread to nifti. You don't need to do anything.
+        # if this throws an error for your images, please just look at the code for this function and adapt it to your needs
+        convert_2d_image_to_nifti(input_image_file, output_image_file, is_seg=False)
+        # the labels are stored as 0: background, 255: road. We need to convert the 255 to 1 because nnU-Net expects
+        # the labels to be consecutive integers. This can be achieved with setting a transform
+        convert_2d_image_to_nifti(input_segmentation_file, output_seg_file, is_seg=True,
+                                  transform=lambda x: (x == 255).astype(int))
+    # now do the same for the test set
+    labels_dir_ts = join(base, 'testing', 'output')
+    images_dir_ts = join(base, 'testing', 'input')
+    testing_cases = subfiles(labels_dir_ts, suffix='.png', join=False)
+    for ts in testing_cases:
+        unique_name = ts[:-4]
+        input_segmentation_file = join(labels_dir_ts, ts)
+        input_image_file = join(images_dir_ts, ts)
+        output_image_file = join(target_imagesTs, unique_name)
+        output_seg_file = join(target_labelsTs, unique_name)
+        convert_2d_image_to_nifti(input_image_file, output_image_file, is_seg=False)
+        convert_2d_image_to_nifti(input_segmentation_file, output_seg_file, is_seg=True,
+                                  transform=lambda x: (x == 255).astype(int))
+    # finally we can call the utility for generating a dataset.json
+    generate_dataset_json(join(target_base, 'dataset.json'), target_imagesTr, target_imagesTs, ('Red', 'Green', 'Blue'),
+                          labels={0: 'background', 1: 'street'}, dataset_name=task_name, license='hands off!')
+    """
+    once this is completed, you can use the dataset like any other nnU-Net dataset. Note that since this is a 2D
+    dataset there is no need to run preprocessing for 3D U-Nets. You should therefore run the
+    `nnUNet_plan_and_preprocess` command like this:
+    > nnUNet_plan_and_preprocess -t 120 -pl3d None
+    once that is completed, you can run the trainings as follows:
+    > nnUNet_train 2d nnUNetTrainerV2 120 FOLD
+    (where fold is again 0, 1, 2, 3 and 4 - 5-fold cross validation)
+    there is no need to run nnUNet_find_best_configuration because there is only one model to choose from.
+    Note that without running nnUNet_find_best_configuration, nnU-Net will not have determined a postprocessing
+    for the whole cross-validation. Spoiler: it will determine not to run postprocessing anyways. If you are using
+    a different 2D dataset, you can make nnU-Net determine the postprocessing by using the
+    `nnUNet_determine_postprocessing` command
+    """

nnunet/dataset_conversion/Task135_KiTS2021.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from nnunet.paths import nnUNet_raw_data
+from nnunet.dataset_conversion.utils import generate_dataset_json
+if __name__ == '__main__':
+    # this is the data folder from the kits21 github repository, see https://github.com/neheller/kits21
+    kits_data_dir = '/home/fabian/git_repos/kits21/kits21/data'
+    # This script uses the majority voted segmentation as ground truth
+    kits_segmentation_filename = 'aggregated_MAJ_seg.nii.gz'
+    # Arbitrary task id. This is just to ensure each dataset ha a unique number. Set this to whatever ([0-999]) you
+    # want
+    task_id = 135
+    task_name = "KiTS2021"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    # setting up nnU-Net folders
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(labelstr)
+    case_ids = subdirs(kits_data_dir, prefix='case_', join=False)
+    for c in case_ids:
+        if isfile(join(kits_data_dir, c, kits_segmentation_filename)):
+            shutil.copy(join(kits_data_dir, c, kits_segmentation_filename), join(labelstr, c + '.nii.gz'))
+            shutil.copy(join(kits_data_dir, c, 'imaging.nii.gz'), join(imagestr, c + '_0000.nii.gz'))
+    generate_dataset_json(join(out_base, 'dataset.json'),
+                          imagestr,
+                          None,
+                          ('CT',),
+                          {
+                              0: 'background',
+                              1: "kidney",
+                              2: "tumor",
+                              3: "cyst",
+                          },
+                          task_name,
+                          license='see https://kits21.kits-challenge.org/participate#download-block',
+                          dataset_description='see https://kits21.kits-challenge.org/',
+                          dataset_reference='https://www.sciencedirect.com/science/article/abs/pii/S1361841520301857, '
+                                            'https://kits21.kits-challenge.org/',
+                          dataset_release='0')

nnunet/dataset_conversion/Task154_RibFrac_multi_label.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import SimpleITK as sitk
+from natsort import natsorted
+import numpy as np
+from pathlib import Path
+import pandas as pd
+from collections import defaultdict
+from shutil import copyfile
+import os
+from os.path import join
+from tqdm import tqdm
+import gc
+import multiprocessing as mp
+from nnunet.dataset_conversion.utils import generate_dataset_json
+from functools import partial
+def preprocess_dataset(dataset_load_path, dataset_save_path, pool):
+    train_image_load_path = join(dataset_load_path, "imagesTr")
+    train_mask_load_path = join(dataset_load_path, "labelsTr")
+    test_image_load_path = join(dataset_load_path, "imagesTs")
+    ribfrac_train_info_1_path = join(dataset_load_path, "ribfrac-train-info-1.csv")
+    ribfrac_train_info_2_path = join(dataset_load_path, "ribfrac-train-info-2.csv")
+    ribfrac_val_info_path = join(dataset_load_path, "ribfrac-val-info.csv")
+    train_image_save_path = join(dataset_save_path, "imagesTr")
+    train_mask_save_path = join(dataset_save_path, "labelsTr")
+    test_image_save_path = join(dataset_save_path, "imagesTs")
+    Path(train_image_save_path).mkdir(parents=True, exist_ok=True)
+    Path(train_mask_save_path).mkdir(parents=True, exist_ok=True)
+    Path(test_image_save_path).mkdir(parents=True, exist_ok=True)
+    meta_data = preprocess_csv(ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path)
+    preprocess_train(train_image_load_path, train_mask_load_path, meta_data, dataset_save_path, pool)
+    preprocess_test(test_image_load_path, dataset_save_path)
+def preprocess_csv(ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path):
+    print("Processing csv...")
+    meta_data = defaultdict(list)
+    for csv_path in [ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path]:
+        df = pd.read_csv(csv_path)
+        for index, row in df.iterrows():
+            name = row["public_id"]
+            instance = row["label_id"]
+            class_label = row["label_code"]
+            meta_data[name].append({"instance": instance, "class_label": class_label})
+    print("Finished csv processing.")
+    return meta_data
+def preprocess_train(image_path, mask_path, meta_data, save_path, pool):
+    print("Processing train data...")
+    pool.map(partial(preprocess_train_single, image_path=image_path, mask_path=mask_path, meta_data=meta_data, save_path=save_path), meta_data.keys())
+    print("Finished processing train data.")
+def preprocess_train_single(name, image_path, mask_path, meta_data, save_path):
+    id = int(name[7:])
+    image, _, _, _ = load_image(join(image_path, name + "-image.nii.gz"), return_meta=True, is_seg=False)
+    instance_seg_mask, spacing, _, _ = load_image(join(mask_path, name + "-label.nii.gz"), return_meta=True, is_seg=True)
+    semantic_seg_mask = np.zeros_like(instance_seg_mask, dtype=int)
+    for entry in meta_data[name]:
+        semantic_seg_mask[instance_seg_mask == entry["instance"]] = entry["class_label"]
+    semantic_seg_mask[semantic_seg_mask == -1] = 5  # Set ignore label to 5
+    save_image(join(save_path, "imagesTr/RibFrac_" + str(id).zfill(4) + "_0000.nii.gz"), image, spacing=spacing, is_seg=False)
+    save_image(join(save_path, "labelsTr/RibFrac_" + str(id).zfill(4) + ".nii.gz"), semantic_seg_mask, spacing=spacing, is_seg=True)
+def preprocess_test(load_test_image_dir, save_path):
+    print("Processing test data...")
+    filenames = load_filenames(load_test_image_dir)
+    for filename in tqdm(filenames):
+        id = int(os.path.basename(filename)[8:-13])
+        copyfile(filename, join(save_path, "imagesTs/RibFrac_" + str(id).zfill(4) + "_0000.nii.gz"))
+    print("Finished processing test data.")
+def load_filenames(img_dir, extensions=None):
+    _img_dir = fix_path(img_dir)
+    img_filenames = []
+    for file in os.listdir(_img_dir):
+        if extensions is None or file.endswith(extensions):
+            img_filenames.append(_img_dir + file)
+    img_filenames = np.asarray(img_filenames)
+    img_filenames = natsorted(img_filenames)
+    return img_filenames
+def fix_path(path):
+    if path[-1] != "/":
+        path += "/"
+    return path
+def load_image(filepath, return_meta=False, is_seg=False):
+    image = sitk.ReadImage(filepath)
+    image_np = sitk.GetArrayFromImage(image)
+    if is_seg:
+        image_np = np.rint(image_np)
+        image_np = image_np.astype(np.int8)  # In special cases segmentations can contain negative labels, so no np.uint8
+    if not return_meta:
+        return image_np
+    else:
+        spacing = image.GetSpacing()
+        keys = image.GetMetaDataKeys()
+        header = {key:image.GetMetaData(key) for key in keys}
+        affine = None  # How do I get the affine transform with SimpleITK? With NiBabel it is just image.affine
+        return image_np, spacing, affine, header
+def save_image(filename, image, spacing=None, affine=None, header=None, is_seg=False, mp_pool=None, free_mem=False):
+    if is_seg:
+        image = np.rint(image)
+        image = image.astype(np.int8)  # In special cases segmentations can contain negative labels, so no np.uint8
+    image = sitk.GetImageFromArray(image)
+    if header is not None:
+        [image.SetMetaData(key, header[key]) for key in header.keys()]
+    if spacing is not None:
+        image.SetSpacing(spacing)
+    if affine is not None:
+        pass  # How do I set the affine transform with SimpleITK? With NiBabel it is just nib.Nifti1Image(img, affine=affine, header=header)
+    if mp_pool is None:
+        sitk.WriteImage(image, filename)
+        if free_mem:
+            del image
+            gc.collect()
+    else:
+        mp_pool.apply_async(_save, args=(filename, image, free_mem,))
+        if free_mem:
+            del image
+            gc.collect()
+def _save(filename, image, free_mem):
+    sitk.WriteImage(image, filename)
+    if free_mem:
+        del image
+        gc.collect()
+if __name__ == "__main__":
+    # Note: Due to a bug in SimpleITK 2.1.x a version of SimpleITK < 2.1.0 is required for loading images. Further, we can't copy the images and masks, but have to load them and resample both to the same spacing.
+    # Conversion instructions:
+    # 1. All sets, parts and CSVs need to be downloaded from https://ribfrac.grand-challenge.org/dataset/
+    # 2. Unzip ribfrac-train-images-1.zip (will be unzipped as Part1) and ribfrac-train-images-2.zip (will be unzipped as Part2), move content from Part2 to Part1 and rename the folder to imagesTr
+    # 3. Unzip ribfrac-train-labels-1.zip (will be unzipped as Part1) and ribfrac-train-labels-2.zip (will be unzipped as Part2), move content from Part2 to Part1 and rename the folder to labelsTr
+    # 4. Unzip ribfrac-val-images.zip and add content to imagesTr, repeat with ribfrac-val-labels.zip
+    # 5. Unzip ribfrac-test-images.zip and rename it to imagesTs
+    pool = mp.Pool(processes=20)
+    dataset_load_path = "/home/k539i/Documents/network_drives/E132-Projekte/Projects/2021_Gotkowski_RibFrac_RibSeg/original/RibFrac/"
+    dataset_save_path = "/home/k539i/Documents/network_drives/E132-Projekte/Projects/2021_Gotkowski_RibFrac_RibSeg/preprocessed/Task154_RibFrac_multi_label/"
+    preprocess_dataset(dataset_load_path, dataset_save_path, pool)
+    print("Still saving images in background...")
+    pool.close()
+    pool.join()
+    print("All tasks finished.")
+    labels = {0: "background", 1: "displaced_rib_fracture", 2: "non_displaced_rib_fracture", 3: "buckle_rib_fracture", 4: "segmental_rib_fracture", 5: "unidentified_rib_fracture"}
+    generate_dataset_json(join(dataset_save_path, 'dataset.json'), join(dataset_save_path, "imagesTr"), None, ('CT',), labels, "Task154_RibFrac_multi_label")

nnunet/dataset_conversion/Task155_RibFrac_binary.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import SimpleITK as sitk
+from natsort import natsorted
+import numpy as np
+from pathlib import Path
+import pandas as pd
+from collections import defaultdict
+from shutil import copyfile
+import os
+from os.path import join
+from tqdm import tqdm
+import gc
+import multiprocessing as mp
+from nnunet.dataset_conversion.utils import generate_dataset_json
+from functools import partial
+def preprocess_dataset(dataset_load_path, dataset_save_path, pool):
+    train_image_load_path = join(dataset_load_path, "imagesTr")
+    train_mask_load_path = join(dataset_load_path, "labelsTr")
+    test_image_load_path = join(dataset_load_path, "imagesTs")
+    ribfrac_train_info_1_path = join(dataset_load_path, "ribfrac-train-info-1.csv")
+    ribfrac_train_info_2_path = join(dataset_load_path, "ribfrac-train-info-2.csv")
+    ribfrac_val_info_path = join(dataset_load_path, "ribfrac-val-info.csv")
+    train_image_save_path = join(dataset_save_path, "imagesTr")
+    train_mask_save_path = join(dataset_save_path, "labelsTr")
+    test_image_save_path = join(dataset_save_path, "imagesTs")
+    Path(train_image_save_path).mkdir(parents=True, exist_ok=True)
+    Path(train_mask_save_path).mkdir(parents=True, exist_ok=True)
+    Path(test_image_save_path).mkdir(parents=True, exist_ok=True)
+    meta_data = preprocess_csv(ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path)
+    preprocess_train(train_image_load_path, train_mask_load_path, meta_data, dataset_save_path, pool)
+    preprocess_test(test_image_load_path, dataset_save_path)
+def preprocess_csv(ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path):
+    print("Processing csv...")
+    meta_data = defaultdict(list)
+    for csv_path in [ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path]:
+        df = pd.read_csv(csv_path)
+        for index, row in df.iterrows():
+            name = row["public_id"]
+            instance = row["label_id"]
+            class_label = row["label_code"]
+            meta_data[name].append({"instance": instance, "class_label": class_label})
+    print("Finished csv processing.")
+    return meta_data
+def preprocess_train(image_path, mask_path, meta_data, save_path, pool):
+    print("Processing train data...")
+    pool.map(partial(preprocess_train_single, image_path=image_path, mask_path=mask_path, meta_data=meta_data, save_path=save_path), meta_data.keys())
+    print("Finished processing train data.")
+def preprocess_train_single(name, image_path, mask_path, meta_data, save_path):
+    id = int(name[7:])
+    image, _, _, _ = load_image(join(image_path, name + "-image.nii.gz"), return_meta=True, is_seg=False)
+    instance_seg_mask, spacing, _, _ = load_image(join(mask_path, name + "-label.nii.gz"), return_meta=True, is_seg=True)
+    semantic_seg_mask = np.zeros_like(instance_seg_mask, dtype=int)
+    for entry in meta_data[name]:
+        class_label = entry["class_label"]
+        if class_label > 0:
+            class_label = 1
+        semantic_seg_mask[instance_seg_mask == entry["instance"]] = class_label
+    save_image(join(save_path, "imagesTr/RibFrac_" + str(id).zfill(4) + "_0000.nii.gz"), image, spacing=spacing, is_seg=False)
+    save_image(join(save_path, "labelsTr/RibFrac_" + str(id).zfill(4) + ".nii.gz"), semantic_seg_mask, spacing=spacing, is_seg=True)
+def preprocess_test(load_test_image_dir, save_path):
+    print("Processing test data...")
+    filenames = load_filenames(load_test_image_dir)
+    for filename in tqdm(filenames):
+        id = int(os.path.basename(filename)[8:-13])
+        copyfile(filename, join(save_path, "imagesTs/RibFrac_" + str(id).zfill(4) + "_0000.nii.gz"))
+    print("Finished processing test data.")
+def load_filenames(img_dir, extensions=None):
+    _img_dir = fix_path(img_dir)
+    img_filenames = []
+    for file in os.listdir(_img_dir):
+        if extensions is None or file.endswith(extensions):
+            img_filenames.append(_img_dir + file)
+    img_filenames = np.asarray(img_filenames)
+    img_filenames = natsorted(img_filenames)
+    return img_filenames
+def fix_path(path):
+    if path[-1] != "/":
+        path += "/"
+    return path
+def load_image(filepath, return_meta=False, is_seg=False):
+    image = sitk.ReadImage(filepath)
+    image_np = sitk.GetArrayFromImage(image)
+    if is_seg:
+        image_np = np.rint(image_np)
+        image_np = image_np.astype(np.int8)  # In special cases segmentations can contain negative labels, so no np.uint8
+    if not return_meta:
+        return image_np
+    else:
+        spacing = image.GetSpacing()
+        keys = image.GetMetaDataKeys()
+        header = {key:image.GetMetaData(key) for key in keys}
+        affine = None  # How do I get the affine transform with SimpleITK? With NiBabel it is just image.affine
+        return image_np, spacing, affine, header
+def save_image(filename, image, spacing=None, affine=None, header=None, is_seg=False, mp_pool=None, free_mem=False):
+    if is_seg:
+        image = np.rint(image)
+        image = image.astype(np.int8)  # In special cases segmentations can contain negative labels, so no np.uint8
+    image = sitk.GetImageFromArray(image)
+    if header is not None:
+        [image.SetMetaData(key, header[key]) for key in header.keys()]
+    if spacing is not None:
+        image.SetSpacing(spacing)
+    if affine is not None:
+        pass  # How do I set the affine transform with SimpleITK? With NiBabel it is just nib.Nifti1Image(img, affine=affine, header=header)
+    if mp_pool is None:
+        sitk.WriteImage(image, filename)
+        if free_mem:
+            del image
+            gc.collect()
+    else:
+        mp_pool.apply_async(_save, args=(filename, image, free_mem,))
+        if free_mem:
+            del image
+            gc.collect()
+def _save(filename, image, free_mem):
+    sitk.WriteImage(image, filename)
+    if free_mem:
+        del image
+        gc.collect()
+if __name__ == "__main__":
+    # Note: Due to a bug in SimpleITK 2.1.x a version of SimpleITK < 2.1.0 is required for loading images. Further, we can't copy the images and masks, but have to load them and resample both to the same spacing.
+    # Conversion instructions:
+    # 1. All sets, parts and CSVs need to be downloaded from https://ribfrac.grand-challenge.org/dataset/
+    # 2. Unzip ribfrac-train-images-1.zip (will be unzipped as Part1) and ribfrac-train-images-2.zip (will be unzipped as Part2), move content from Part2 to Part1 and rename the folder to imagesTr
+    # 3. Unzip ribfrac-train-labels-1.zip (will be unzipped as Part1) and ribfrac-train-labels-2.zip (will be unzipped as Part2), move content from Part2 to Part1 and rename the folder to labelsTr
+    # 4. Unzip ribfrac-val-images.zip and add content to imagesTr, repeat with ribfrac-val-labels.zip
+    # 5. Unzip ribfrac-test-images.zip and rename it to imagesTs
+    pool = mp.Pool(processes=20)
+    dataset_load_path = "/home/k539i/Documents/network_drives/E132-Projekte/Projects/2021_Gotkowski_RibFrac_RibSeg/original/RibFrac/"
+    dataset_save_path = "/home/k539i/Documents/network_drives/E132-Projekte/Projects/2021_Gotkowski_RibFrac_RibSeg/preprocessed/Task155_RibFrac_binary/"
+    preprocess_dataset(dataset_load_path, dataset_save_path, pool)
+    print("Still saving images in background...")
+    pool.close()
+    pool.join()
+    print("All tasks finished.")
+    labels = {0: "background", 1: "fracture"}
+    generate_dataset_json(join(dataset_save_path, 'dataset.json'), join(dataset_save_path, "imagesTr"), None, ('CT',), labels, "Task155_RibFrac_binary")

nnunet/dataset_conversion/Task156_RibSeg.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from natsort import natsorted
+import numpy as np
+from pathlib import Path
+import os
+from os.path import join
+from nnunet.dataset_conversion.utils import generate_dataset_json
+import SimpleITK as sitk
+import gc
+import multiprocessing as mp
+from functools import partial
+def preprocess_dataset(ribfrac_load_path, ribseg_load_path, dataset_save_path, pool):
+    mask_load_path = join(ribseg_load_path, "labelsTr")
+    train_image_save_path = join(dataset_save_path, "imagesTr")
+    train_mask_save_path = join(dataset_save_path, "labelsTr")
+    test_image_save_path = join(dataset_save_path, "imagesTs")
+    test_labels_save_path = join(dataset_save_path, "labelsTs")
+    Path(train_image_save_path).mkdir(parents=True, exist_ok=True)
+    Path(train_mask_save_path).mkdir(parents=True, exist_ok=True)
+    Path(test_image_save_path).mkdir(parents=True, exist_ok=True)
+    Path(test_labels_save_path).mkdir(parents=True, exist_ok=True)
+    mask_filenames = load_filenames(mask_load_path)
+    pool.map(partial(preprocess_single, image_load_path=ribfrac_load_path), mask_filenames)
+def preprocess_single(filename, image_load_path):
+    name = os.path.basename(filename)
+    if "-cl.nii.gz" in name:
+        return
+    id = int(name.split("-")[0][7:])
+    image_set = "imagesTr"
+    mask_set = "labelsTr"
+    if id > 500:
+        image_set = "imagesTs"
+        mask_set = "labelsTs"
+    image, _, _, _ = load_image(join(image_load_path, image_set, "RibFrac{}-image.nii.gz".format(id)), return_meta=True, is_seg=False)
+    mask, spacing, _, _ = load_image(filename, return_meta=True, is_seg=True)
+    save_image(join(dataset_save_path, image_set, "RibSeg_" + str(id).zfill(4) + "_0000.nii.gz"), image, spacing=spacing, is_seg=False)
+    save_image(join(dataset_save_path, mask_set, "RibSeg_" + str(id).zfill(4) + ".nii.gz"), mask, spacing=spacing, is_seg=True)
+def load_filenames(img_dir, extensions=None):
+    _img_dir = fix_path(img_dir)
+    img_filenames = []
+    for file in os.listdir(_img_dir):
+        if extensions is None or file.endswith(extensions):
+            img_filenames.append(_img_dir + file)
+    img_filenames = np.asarray(img_filenames)
+    img_filenames = natsorted(img_filenames)
+    return img_filenames
+def fix_path(path):
+    if path[-1] != "/":
+        path += "/"
+    return path
+def load_image(filepath, return_meta=False, is_seg=False):
+    image = sitk.ReadImage(filepath)
+    image_np = sitk.GetArrayFromImage(image)
+    if is_seg:
+        image_np = np.rint(image_np)
+        image_np = image_np.astype(np.int8)  # In special cases segmentations can contain negative labels, so no np.uint8
+    if not return_meta:
+        return image_np
+    else:
+        spacing = image.GetSpacing()
+        keys = image.GetMetaDataKeys()
+        header = {key:image.GetMetaData(key) for key in keys}
+        affine = None  # How do I get the affine transform with SimpleITK? With NiBabel it is just image.affine
+        return image_np, spacing, affine, header
+def save_image(filename, image, spacing=None, affine=None, header=None, is_seg=False, mp_pool=None, free_mem=False):
+    if is_seg:
+        image = np.rint(image)
+        image = image.astype(np.int8)  # In special cases segmentations can contain negative labels, so no np.uint8
+    image = sitk.GetImageFromArray(image)
+    if header is not None:
+        [image.SetMetaData(key, header[key]) for key in header.keys()]
+    if spacing is not None:
+        image.SetSpacing(spacing)
+    if affine is not None:
+        pass  # How do I set the affine transform with SimpleITK? With NiBabel it is just nib.Nifti1Image(img, affine=affine, header=header)
+    if mp_pool is None:
+        sitk.WriteImage(image, filename)
+        if free_mem:
+            del image
+            gc.collect()
+    else:
+        mp_pool.apply_async(_save, args=(filename, image, free_mem,))
+        if free_mem:
+            del image
+            gc.collect()
+def _save(filename, image, free_mem):
+    sitk.WriteImage(image, filename)
+    if free_mem:
+        del image
+        gc.collect()
+if __name__ == "__main__":
+    # Note: Due to a bug in SimpleITK 2.1.x a version of SimpleITK < 2.1.0 is required for loading images. Further, we can't copy the images and masks, but have to load them and resample both to the same spacing.
+    # Conversion instructions:
+    # 1. All images from both training and validation set of the RibFrac dataset need to be downloaded from https://ribfrac.grand-challenge.org/dataset/ into a new folder named RibFrac
+    # 2. The RibSeg masks need to be downloaded from https://zenodo.org/record/5336592 into a new folder named RibSeg
+    # 3. Follow unpacking instruction for the RibFrac dataset as in Task154_RibFrac
+    # 4. Unzip RibSeg_490_nii.zip from the RibSeg dataset and rename the folder labelsTr
+    ribfrac_load_path = "/home/k539i/Documents/datasets/original/RibFrac/"
+    ribseg_load_path = "/home/k539i/Documents/datasets/original/RibSeg/"
+    dataset_save_path = "/home/k539i/Documents/datasets/preprocessed/Task156_RibSeg/"
+    max_imagesTr_id = 500
+    pool = mp.Pool(processes=20)
+    preprocess_dataset(ribfrac_load_path, ribseg_load_path, dataset_save_path, pool)
+    print("Still saving images in background...")
+    pool.close()
+    pool.join()
+    print("All tasks finished.")
+    generate_dataset_json(join(dataset_save_path, 'dataset.json'), join(dataset_save_path, "imagesTr"), None, ('CT',), {0: 'bg', 1: 'rib'}, "Task156_RibSeg")

nnunet/dataset_conversion/Task159_MyoPS2020.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import SimpleITK
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import SimpleITK as sitk
+from nnunet.paths import nnUNet_raw_data
+from nnunet.dataset_conversion.utils import generate_dataset_json
+from nnunet.utilities.sitk_stuff import copy_geometry
+def convert_labels_to_nnunet(source_nifti: str, target_nifti: str):
+    img = sitk.ReadImage(source_nifti)
+    img_npy = sitk.GetArrayFromImage(img)
+    nnunet_seg = np.zeros(img_npy.shape, dtype=np.uint8)
+    # why are they not using normal labels and instead use random numbers???
+    nnunet_seg[img_npy == 500] = 1  # left ventricular (LV) blood pool (500)
+    nnunet_seg[img_npy == 600] = 2  # right ventricular blood pool (600)
+    nnunet_seg[img_npy == 200] = 3  # LV normal myocardium (200)
+    nnunet_seg[img_npy == 1220] = 4  # LV myocardial edema (1220)
+    nnunet_seg[img_npy == 2221] = 5  # LV myocardial scars (2221)
+    nnunet_seg_itk = sitk.GetImageFromArray(nnunet_seg)
+    nnunet_seg_itk = copy_geometry(nnunet_seg_itk, img)
+    sitk.WriteImage(nnunet_seg_itk, target_nifti)
+def convert_labels_back_to_myops(source_nifti: str, target_nifti: str):
+    nnunet_itk = sitk.ReadImage(source_nifti)
+    nnunet_npy = sitk.GetArrayFromImage(nnunet_itk)
+    myops_seg = np.zeros(nnunet_npy.shape, dtype=np.uint8)
+    # why are they not using normal labels and instead use random numbers???
+    myops_seg[nnunet_npy == 1] = 500  # left ventricular (LV) blood pool (500)
+    myops_seg[nnunet_npy == 2] = 600  # right ventricular blood pool (600)
+    myops_seg[nnunet_npy == 3] = 200  # LV normal myocardium (200)
+    myops_seg[nnunet_npy == 4] = 1220  # LV myocardial edema (1220)
+    myops_seg[nnunet_npy == 5] = 2221  # LV myocardial scars (2221)
+    myops_seg_itk = sitk.GetImageFromArray(myops_seg)
+    myops_seg_itk = copy_geometry(myops_seg_itk, nnunet_itk)
+    sitk.WriteImage(myops_seg_itk, target_nifti)
+if __name__ == '__main__':
+    # this is where we extracted all the archives. This folder must have the subfolders test20, train25,
+    # train25_myops_gd. We do not use test_data_gd because the test GT is encoded and cannot be used as it is
+    base = '/home/fabian/Downloads/MyoPS 2020 Dataset'
+    # Arbitrary task id. This is just to ensure each dataset ha a unique number. Set this to whatever ([0-999]) you
+    # want
+    task_id = 159
+    task_name = "MyoPS2020"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    # setting up nnU-Net folders
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    imagestr_source = join(base, 'train25')
+    imagests_source = join(base, 'test20')
+    labelstr_source = join(base, 'train25_myops_gd')
+    # convert training set
+    nii_files = nifti_files(imagestr_source, join=False)
+    # remove their modality identifier. Conveniently it's always 2 characters. np.unique to get the identifiers
+    identifiers = np.unique([i[:-len('_C0.nii.gz')] for i in nii_files])
+    for i in identifiers:
+        shutil.copy(join(imagestr_source, i + "_C0.nii.gz"), join(imagestr, i + '_0000.nii.gz'))
+        shutil.copy(join(imagestr_source, i + "_DE.nii.gz"), join(imagestr, i + '_0001.nii.gz'))
+        shutil.copy(join(imagestr_source, i + "_T2.nii.gz"), join(imagestr, i + '_0002.nii.gz'))
+        convert_labels_to_nnunet(join(labelstr_source, i + '_gd.nii.gz'), join(labelstr, i + '.nii.gz'))
+    # test set
+    nii_files = nifti_files(imagests_source, join=False)
+    # remove their modality identifier. Conveniently it's always 2 characters. np.unique to get the identifiers
+    identifiers = np.unique([i[:-len('_C0.nii.gz')] for i in nii_files])
+    for i in identifiers:
+        shutil.copy(join(imagests_source, i + "_C0.nii.gz"), join(imagests, i + '_0000.nii.gz'))
+        shutil.copy(join(imagests_source, i + "_DE.nii.gz"), join(imagests, i + '_0001.nii.gz'))
+        shutil.copy(join(imagests_source, i + "_T2.nii.gz"), join(imagests, i + '_0002.nii.gz'))
+    generate_dataset_json(join(out_base, 'dataset.json'),
+                          imagestr,
+                          None,
+                          ('C0', 'DE', 'T2'),
+                          {
+                              0: 'background',
+                              1: "left ventricular (LV) blood pool",
+                              2: "right ventricular blood pool",
+                              3: "LV normal myocardium",
+                              4: "LV myocardial edema",
+                              5: "LV myocardial scars",
+                          },
+                          task_name,
+                          license='see http://www.sdspeople.fudan.edu.cn/zhuangxiahai/0/myops20/index.html',
+                          dataset_description='see http://www.sdspeople.fudan.edu.cn/zhuangxiahai/0/myops20/index.html',
+                          dataset_reference='http://www.sdspeople.fudan.edu.cn/zhuangxiahai/0/myops20/index.html',
+                          dataset_release='0')
+    # REMEMBER THAT TEST SET INFERENCE WILL REQUIRE YOU CONVERT THE LABELS BACK TO THEIR CONVENTION
+    # use convert_labels_back_to_myops for that!
+    # man I am such a nice person. Love you guys.

nnunet/dataset_conversion/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from __future__ import absolute_import
2	+
3	+ from . import *

nnunet/dataset_conversion/utils.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from typing import Tuple
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+def get_identifiers_from_splitted_files(folder: str):
+    uniques = np.unique([i[:-12] for i in subfiles(folder, suffix='.nii.gz', join=False)])
+    return uniques
+def generate_dataset_json(output_file: str, imagesTr_dir: str, imagesTs_dir: str, modalities: Tuple,
+                          labels: dict, dataset_name: str, sort_keys=True, license: str = "hands off!", dataset_description: str = "",
+                          dataset_reference="", dataset_release='0.0'):
+    """
+    :param output_file: This needs to be the full path to the dataset.json you intend to write, so
+    output_file='DATASET_PATH/dataset.json' where the folder DATASET_PATH points to is the one with the
+    imagesTr and labelsTr subfolders
+    :param imagesTr_dir: path to the imagesTr folder of that dataset
+    :param imagesTs_dir: path to the imagesTs folder of that dataset. Can be None
+    :param modalities: tuple of strings with modality names. must be in the same order as the images (first entry
+    corresponds to _0000.nii.gz, etc). Example: ('T1', 'T2', 'FLAIR').
+    :param labels: dict with int->str (key->value) mapping the label IDs to label names. Note that 0 is always
+    supposed to be background! Example: {0: 'background', 1: 'edema', 2: 'enhancing tumor'}
+    :param dataset_name: The name of the dataset. Can be anything you want
+    :param sort_keys: In order to sort or not, the keys in dataset.json
+    :param license:
+    :param dataset_description:
+    :param dataset_reference: website of the dataset, if available
+    :param dataset_release:
+    :return:
+    """
+    train_identifiers = get_identifiers_from_splitted_files(imagesTr_dir)
+    if imagesTs_dir is not None:
+        test_identifiers = get_identifiers_from_splitted_files(imagesTs_dir)
+    else:
+        test_identifiers = []
+    json_dict = {}
+    json_dict['name'] = dataset_name
+    json_dict['description'] = dataset_description
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = dataset_reference
+    json_dict['licence'] = license
+    json_dict['release'] = dataset_release
+    json_dict['modality'] = {str(i): modalities[i] for i in range(len(modalities))}
+    json_dict['labels'] = {str(i): labels[i] for i in labels.keys()}
+    json_dict['numTraining'] = len(train_identifiers)
+    json_dict['numTest'] = len(test_identifiers)
+    json_dict['training'] = [
+        {'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i
+        in
+        train_identifiers]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_identifiers]
+    if not output_file.endswith("dataset.json"):
+        print("WARNING: output file name is not dataset.json! This may be intentional or not. You decide. "
+              "Proceeding anyways...")
+    save_json(json_dict, os.path.join(output_file), sort_keys=sort_keys)

nnunet/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from __future__ import absolute_import
2	+ from . import *

nnunet/evaluation/add_dummy_task_with_mean_over_all_tasks.py ADDED Viewed

	@@ -0,0 +1,77 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import json
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import subfiles
+import os
+from collections import OrderedDict
+folder = "/home/fabian/drives/E132-Projekte/Projects/2018_MedicalDecathlon/Leaderboard"
+task_descriptors = ['2D final 2',
+                    '2D final, less pool, dc and topK, fold0',
+                    '2D final pseudo3d 7, fold0',
+                    '2D final, less pool, dc and ce, fold0',
+                    '3D stage0 final 2, fold0',
+                    '3D fullres final 2, fold0']
+task_ids_with_no_stage0 = ["Task001_BrainTumour", "Task004_Hippocampus", "Task005_Prostate"]
+mean_scores = OrderedDict()
+for t in task_descriptors:
+    mean_scores[t] = OrderedDict()
+json_files = subfiles(folder, True, None, ".json", True)
+json_files = [i for i in json_files if not i.split("/")[-1].startswith(".")]  # stupid mac
+for j in json_files:
+    with open(j, 'r') as f:
+        res = json.load(f)
+    task = res['task']
+    if task != "Task999_ALL":
+        name = res['name']
+        if name in task_descriptors:
+            if task not in list(mean_scores[name].keys()):
+                mean_scores[name][task] = res['results']['mean']['mean']
+            else:
+                raise RuntimeError("duplicate task %s for description %s" % (task, name))
+for t in task_ids_with_no_stage0:
+    mean_scores["3D stage0 final 2, fold0"][t] = mean_scores["3D fullres final 2, fold0"][t]
+a = set()
+for i in mean_scores.keys():
+    a = a.union(list(mean_scores[i].keys()))
+for i in mean_scores.keys():
+    try:
+        for t in list(a):
+            assert t in mean_scores[i].keys(), "did not find task %s for experiment %s" % (t, i)
+        new_res = OrderedDict()
+        new_res['name'] = i
+        new_res['author'] = "Fabian"
+        new_res['task'] = "Task999_ALL"
+        new_res['results'] = OrderedDict()
+        new_res['results']['mean'] = OrderedDict()
+        new_res['results']['mean']['mean'] = OrderedDict()
+        tasks = list(mean_scores[i].keys())
+        metrics = mean_scores[i][tasks[0]].keys()
+        for m in metrics:
+            foreground_values = [mean_scores[i][n][m] for n in tasks]
+            new_res['results']['mean']["mean"][m] = np.nanmean(foreground_values)
+        output_fname = i.replace(" ", "_") + "_globalMean.json"
+        with open(os.path.join(folder, output_fname), 'w') as f:
+            json.dump(new_res, f)
+    except AssertionError:
+        print("could not process experiment %s" % i)
+        print("did not find task %s for experiment %s" % (t, i))

nnunet/evaluation/add_mean_dice_to_json.py ADDED Viewed

	@@ -0,0 +1,51 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import json
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import subfiles
+from collections import OrderedDict
+def foreground_mean(filename):
+    with open(filename, 'r') as f:
+        res = json.load(f)
+    class_ids = np.array([int(i) for i in res['results']['mean'].keys() if (i != 'mean')])
+    class_ids = class_ids[class_ids != 0]
+    class_ids = class_ids[class_ids != -1]
+    class_ids = class_ids[class_ids != 99]
+    tmp = res['results']['mean'].get('99')
+    if tmp is not None:
+        _ = res['results']['mean'].pop('99')
+    metrics = res['results']['mean']['1'].keys()
+    res['results']['mean']["mean"] = OrderedDict()
+    for m in metrics:
+        foreground_values = [res['results']['mean'][str(i)][m] for i in class_ids]
+        res['results']['mean']["mean"][m] = np.nanmean(foreground_values)
+    with open(filename, 'w') as f:
+        json.dump(res, f, indent=4, sort_keys=True)
+def run_in_folder(folder):
+    json_files = subfiles(folder, True, None, ".json", True)
+    json_files = [i for i in json_files if not i.split("/")[-1].startswith(".") and not i.endswith("_globalMean.json")] # stupid mac
+    for j in json_files:
+        foreground_mean(j)
+if __name__ == "__main__":
+    folder = "/media/fabian/Results/nnUNetOutput_final/summary_jsons"
+    run_in_folder(folder)

nnunet/evaluation/collect_results_files.py ADDED Viewed

	@@ -0,0 +1,48 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import os
+import shutil
+from batchgenerators.utilities.file_and_folder_operations import subdirs, subfiles
+def crawl_and_copy(current_folder, out_folder, prefix="fabian_", suffix="ummary.json"):
+    """
+    This script will run recursively through all subfolders of current_folder and copy all files that end with
+    suffix with some automatically generated prefix into out_folder
+    :param current_folder:
+    :param out_folder:
+    :param prefix:
+    :return:
+    """
+    s = subdirs(current_folder, join=False)
+    f = subfiles(current_folder, join=False)
+    f = [i for i in f if i.endswith(suffix)]
+    if current_folder.find("fold0") != -1:
+        for fl in f:
+            shutil.copy(os.path.join(current_folder, fl), os.path.join(out_folder, prefix+fl))
+    for su in s:
+        if prefix == "":
+            add = su
+        else:
+            add = "__" + su
+        crawl_and_copy(os.path.join(current_folder, su), out_folder, prefix=prefix+add)
+if __name__ == "__main__":
+    from nnunet.paths import network_training_output_dir
+    output_folder = "/home/fabian/PhD/results/nnUNetV2/leaderboard"
+    crawl_and_copy(network_training_output_dir, output_folder)
+    from nnunet.evaluation.add_mean_dice_to_json import run_in_folder
+    run_in_folder(output_folder)

nnunet/evaluation/evaluator.py ADDED Viewed

	@@ -0,0 +1,483 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import collections
+import inspect
+import json
+import hashlib
+from datetime import datetime
+from multiprocessing.pool import Pool
+import numpy as np
+import pandas as pd
+import SimpleITK as sitk
+from nnunet.evaluation.metrics import ConfusionMatrix, ALL_METRICS
+from batchgenerators.utilities.file_and_folder_operations import save_json, subfiles, join
+from collections import OrderedDict
+class Evaluator:
+    """Object that holds test and reference segmentations with label information
+    and computes a number of metrics on the two. 'labels' must either be an
+    iterable of numeric values (or tuples thereof) or a dictionary with string
+    names and numeric values.
+    """
+    default_metrics = [
+        "False Positive Rate",
+        "Dice",
+        "Jaccard",
+        "Precision",
+        "Recall",
+        "Accuracy",
+        "False Omission Rate",
+        "Negative Predictive Value",
+        "False Negative Rate",
+        "True Negative Rate",
+        "False Discovery Rate",
+        "Total Positives Test",
+        "Total Positives Reference"
+    ]
+    default_advanced_metrics = [
+        #"Hausdorff Distance",
+        "Hausdorff Distance 95",
+        #"Avg. Surface Distance",
+        #"Avg. Symmetric Surface Distance"
+    ]
+    def __init__(self,
+                 test=None,
+                 reference=None,
+                 labels=None,
+                 metrics=None,
+                 advanced_metrics=None,
+                 nan_for_nonexisting=True):
+        self.test = None
+        self.reference = None
+        self.confusion_matrix = ConfusionMatrix()
+        self.labels = None
+        self.nan_for_nonexisting = nan_for_nonexisting
+        self.result = None
+        self.metrics = []
+        if metrics is None:
+            for m in self.default_metrics:
+                self.metrics.append(m)
+        else:
+            for m in metrics:
+                self.metrics.append(m)
+        self.advanced_metrics = []
+        if advanced_metrics is None:
+            for m in self.default_advanced_metrics:
+                self.advanced_metrics.append(m)
+        else:
+            for m in advanced_metrics:
+                self.advanced_metrics.append(m)
+        self.set_reference(reference)
+        self.set_test(test)
+        if labels is not None:
+            self.set_labels(labels)
+        else:
+            if test is not None and reference is not None:
+                self.construct_labels()
+    def set_test(self, test):
+        """Set the test segmentation."""
+        self.test = test
+    def set_reference(self, reference):
+        """Set the reference segmentation."""
+        self.reference = reference
+    def set_labels(self, labels):
+        """Set the labels.
+        :param labels= may be a dictionary (int->str), a set (of ints), a tuple (of ints) or a list (of ints). Labels
+        will only have names if you pass a dictionary"""
+        if isinstance(labels, dict):
+            self.labels = collections.OrderedDict(labels)
+        elif isinstance(labels, set):
+            self.labels = list(labels)
+        elif isinstance(labels, np.ndarray):
+            self.labels = [i for i in labels]
+        elif isinstance(labels, (list, tuple)):
+            self.labels = labels
+        else:
+            raise TypeError("Can only handle dict, list, tuple, set & numpy array, but input is of type {}".format(type(labels)))
+    def construct_labels(self):
+        """Construct label set from unique entries in segmentations."""
+        if self.test is None and self.reference is None:
+            raise ValueError("No test or reference segmentations.")
+        elif self.test is None:
+            labels = np.unique(self.reference)
+        else:
+            labels = np.union1d(np.unique(self.test),
+                                np.unique(self.reference))
+        self.labels = list(map(lambda x: int(x), labels))
+    def set_metrics(self, metrics):
+        """Set evaluation metrics"""
+        if isinstance(metrics, set):
+            self.metrics = list(metrics)
+        elif isinstance(metrics, (list, tuple, np.ndarray)):
+            self.metrics = metrics
+        else:
+            raise TypeError("Can only handle list, tuple, set & numpy array, but input is of type {}".format(type(metrics)))
+    def add_metric(self, metric):
+        if metric not in self.metrics:
+            self.metrics.append(metric)
+    def evaluate(self, test=None, reference=None, advanced=False, **metric_kwargs):
+        """Compute metrics for segmentations."""
+        if test is not None:
+            self.set_test(test)
+        if reference is not None:
+            self.set_reference(reference)
+        if self.test is None or self.reference is None:
+            raise ValueError("Need both test and reference segmentations.")
+        if self.labels is None:
+            self.construct_labels()
+        self.metrics.sort()
+        # get functions for evaluation
+        # somewhat convoluted, but allows users to define additonal metrics
+        # on the fly, e.g. inside an IPython console
+        _funcs = {m: ALL_METRICS[m] for m in self.metrics + self.advanced_metrics}
+        frames = inspect.getouterframes(inspect.currentframe())
+        for metric in self.metrics:
+            for f in frames:
+                if metric in f[0].f_locals:
+                    _funcs[metric] = f[0].f_locals[metric]
+                    break
+            else:
+                if metric in _funcs:
+                    continue
+                else:
+                    raise NotImplementedError(
+                        "Metric {} not implemented.".format(metric))
+        # get results
+        self.result = OrderedDict()
+        eval_metrics = self.metrics
+        if advanced:
+            eval_metrics += self.advanced_metrics
+        if isinstance(self.labels, dict):
+            for label, name in self.labels.items():
+                k = str(name)
+                self.result[k] = OrderedDict()
+                if not hasattr(label, "__iter__"):
+                    self.confusion_matrix.set_test(self.test == label)
+                    self.confusion_matrix.set_reference(self.reference == label)
+                else:
+                    current_test = 0
+                    current_reference = 0
+                    for l in label:
+                        current_test += (self.test == l)
+                        current_reference += (self.reference == l)
+                    self.confusion_matrix.set_test(current_test)
+                    self.confusion_matrix.set_reference(current_reference)
+                for metric in eval_metrics:
+                    self.result[k][metric] = _funcs[metric](confusion_matrix=self.confusion_matrix,
+                                                               nan_for_nonexisting=self.nan_for_nonexisting,
+                                                               **metric_kwargs)
+        else:
+            for i, l in enumerate(self.labels):
+                k = str(l)
+                self.result[k] = OrderedDict()
+                self.confusion_matrix.set_test(self.test == l)
+                self.confusion_matrix.set_reference(self.reference == l)
+                for metric in eval_metrics:
+                    self.result[k][metric] = _funcs[metric](confusion_matrix=self.confusion_matrix,
+                                                            nan_for_nonexisting=self.nan_for_nonexisting,
+                                                            **metric_kwargs)
+        return self.result
+    def to_dict(self):
+        if self.result is None:
+            self.evaluate()
+        return self.result
+    def to_array(self):
+        """Return result as numpy array (labels x metrics)."""
+        if self.result is None:
+            self.evaluate
+        result_metrics = sorted(self.result[list(self.result.keys())[0]].keys())
+        a = np.zeros((len(self.labels), len(result_metrics)), dtype=np.float32)
+        if isinstance(self.labels, dict):
+            for i, label in enumerate(self.labels.keys()):
+                for j, metric in enumerate(result_metrics):
+                    a[i][j] = self.result[self.labels[label]][metric]
+        else:
+            for i, label in enumerate(self.labels):
+                for j, metric in enumerate(result_metrics):
+                    a[i][j] = self.result[label][metric]
+        return a
+    def to_pandas(self):
+        """Return result as pandas DataFrame."""
+        a = self.to_array()
+        if isinstance(self.labels, dict):
+            labels = list(self.labels.values())
+        else:
+            labels = self.labels
+        result_metrics = sorted(self.result[list(self.result.keys())[0]].keys())
+        return pd.DataFrame(a, index=labels, columns=result_metrics)
+class NiftiEvaluator(Evaluator):
+    def __init__(self, *args, **kwargs):
+        self.test_nifti = None
+        self.reference_nifti = None
+        super(NiftiEvaluator, self).__init__(*args, **kwargs)
+    def set_test(self, test):
+        """Set the test segmentation."""
+        if test is not None:
+            self.test_nifti = sitk.ReadImage(test)
+            super(NiftiEvaluator, self).set_test(sitk.GetArrayFromImage(self.test_nifti))
+        else:
+            self.test_nifti = None
+            super(NiftiEvaluator, self).set_test(test)
+    def set_reference(self, reference):
+        """Set the reference segmentation."""
+        if reference is not None:
+            self.reference_nifti = sitk.ReadImage(reference)
+            super(NiftiEvaluator, self).set_reference(sitk.GetArrayFromImage(self.reference_nifti))
+        else:
+            self.reference_nifti = None
+            super(NiftiEvaluator, self).set_reference(reference)
+    def evaluate(self, test=None, reference=None, voxel_spacing=None, **metric_kwargs):
+        if voxel_spacing is None:
+            voxel_spacing = np.array(self.test_nifti.GetSpacing())[::-1]
+            metric_kwargs["voxel_spacing"] = voxel_spacing
+        return super(NiftiEvaluator, self).evaluate(test, reference, **metric_kwargs)
+def run_evaluation(args):
+    test, ref, evaluator, metric_kwargs = args
+    # evaluate
+    evaluator.set_test(test)
+    evaluator.set_reference(ref)
+    if evaluator.labels is None:
+        evaluator.construct_labels()
+    current_scores = evaluator.evaluate(**metric_kwargs)
+    if type(test) == str:
+        current_scores["test"] = test
+    if type(ref) == str:
+        current_scores["reference"] = ref
+    return current_scores
+def aggregate_scores(test_ref_pairs,
+                     evaluator=NiftiEvaluator,
+                     labels=None,
+                     nanmean=True,
+                     json_output_file=None,
+                     json_name="",
+                     json_description="",
+                     json_author="Fabian",
+                     json_task="",
+                     num_threads=2,
+                     **metric_kwargs):
+    """
+    test = predicted image
+    :param test_ref_pairs:
+    :param evaluator:
+    :param labels: must be a dict of int-> str or a list of int
+    :param nanmean:
+    :param json_output_file:
+    :param json_name:
+    :param json_description:
+    :param json_author:
+    :param json_task:
+    :param metric_kwargs:
+    :return:
+    """
+    if type(evaluator) == type:
+        evaluator = evaluator()
+    if labels is not None:
+        evaluator.set_labels(labels)
+    all_scores = OrderedDict()
+    all_scores["all"] = []
+    all_scores["mean"] = OrderedDict()
+    test = [i[0] for i in test_ref_pairs]
+    ref = [i[1] for i in test_ref_pairs]
+    p = Pool(num_threads)
+    all_res = p.map(run_evaluation, zip(test, ref, [evaluator]*len(ref), [metric_kwargs]*len(ref)))
+    p.close()
+    p.join()
+    for i in range(len(all_res)):
+        all_scores["all"].append(all_res[i])
+        # append score list for mean
+        for label, score_dict in all_res[i].items():
+            if label in ("test", "reference"):
+                continue
+            if label not in all_scores["mean"]:
+                all_scores["mean"][label] = OrderedDict()
+            for score, value in score_dict.items():
+                if score not in all_scores["mean"][label]:
+                    all_scores["mean"][label][score] = []
+                all_scores["mean"][label][score].append(value)
+    for label in all_scores["mean"]:
+        for score in all_scores["mean"][label]:
+            if nanmean:
+                all_scores["mean"][label][score] = float(np.nanmean(all_scores["mean"][label][score]))
+            else:
+                all_scores["mean"][label][score] = float(np.mean(all_scores["mean"][label][score]))
+    # save to file if desired
+    # we create a hopefully unique id by hashing the entire output dictionary
+    if json_output_file is not None:
+        json_dict = OrderedDict()
+        json_dict["name"] = json_name
+        json_dict["description"] = json_description
+        timestamp = datetime.today()
+        json_dict["timestamp"] = str(timestamp)
+        json_dict["task"] = json_task
+        json_dict["author"] = json_author
+        json_dict["results"] = all_scores
+        json_dict["id"] = hashlib.md5(json.dumps(json_dict).encode("utf-8")).hexdigest()[:12]
+        save_json(json_dict, json_output_file)
+    return all_scores
+def aggregate_scores_for_experiment(score_file,
+                                    labels=None,
+                                    metrics=Evaluator.default_metrics,
+                                    nanmean=True,
+                                    json_output_file=None,
+                                    json_name="",
+                                    json_description="",
+                                    json_author="Fabian",
+                                    json_task=""):
+    scores = np.load(score_file)
+    scores_mean = scores.mean(0)
+    if labels is None:
+        labels = list(map(str, range(scores.shape[1])))
+    results = []
+    results_mean = OrderedDict()
+    for i in range(scores.shape[0]):
+        results.append(OrderedDict())
+        for l, label in enumerate(labels):
+            results[-1][label] = OrderedDict()
+            results_mean[label] = OrderedDict()
+            for m, metric in enumerate(metrics):
+                results[-1][label][metric] = float(scores[i][l][m])
+                results_mean[label][metric] = float(scores_mean[l][m])
+    json_dict = OrderedDict()
+    json_dict["name"] = json_name
+    json_dict["description"] = json_description
+    timestamp = datetime.today()
+    json_dict["timestamp"] = str(timestamp)
+    json_dict["task"] = json_task
+    json_dict["author"] = json_author
+    json_dict["results"] = {"all": results, "mean": results_mean}
+    json_dict["id"] = hashlib.md5(json.dumps(json_dict).encode("utf-8")).hexdigest()[:12]
+    if json_output_file is not None:
+        json_output_file = open(json_output_file, "w")
+        json.dump(json_dict, json_output_file, indent=4, separators=(",", ": "))
+        json_output_file.close()
+    return json_dict
+def evaluate_folder(folder_with_gts: str, folder_with_predictions: str, labels: tuple, **metric_kwargs):
+    """
+    writes a summary.json to folder_with_predictions
+    :param folder_with_gts: folder where the ground truth segmentations are saved. Must be nifti files.
+    :param folder_with_predictions: folder where the predicted segmentations are saved. Must be nifti files.
+    :param labels: tuple of int with the labels in the dataset. For example (0, 1, 2, 3) for Task001_BrainTumour.
+    :return:
+    """
+    files_gt = subfiles(folder_with_gts, suffix=".nii.gz", join=False)
+    files_pred = subfiles(folder_with_predictions, suffix=".nii.gz", join=False)
+    assert all([i in files_pred for i in files_gt]), "files missing in folder_with_predictions"
+    assert all([i in files_gt for i in files_pred]), "files missing in folder_with_gts"
+    test_ref_pairs = [(join(folder_with_predictions, i), join(folder_with_gts, i)) for i in files_pred]
+    res = aggregate_scores(test_ref_pairs, json_output_file=join(folder_with_predictions, "summary.json"),
+                           num_threads=8, labels=labels, **metric_kwargs)
+    return res
+def nnunet_evaluate_folder():
+    import argparse
+    parser = argparse.ArgumentParser("Evaluates the segmentations located in the folder pred. Output of this script is "
+                                     "a json file. At the very bottom of the json file is going to be a 'mean' "
+                                     "entry with averages metrics across all cases")
+    parser.add_argument('-ref', required=True, type=str, help="Folder containing the reference segmentations in nifti "
+                                                              "format.")
+    parser.add_argument('-pred', required=True, type=str, help="Folder containing the predicted segmentations in nifti "
+                                                               "format. File names must match between the folders!")
+    parser.add_argument('-l', nargs='+', type=int, required=True, help="List of label IDs (integer values) that should "
+                                                                       "be evaluated. Best practice is to use all int "
+                                                                       "values present in the dataset, so for example "
+                                                                       "for LiTS the labels are 0: background, 1: "
+                                                                       "liver, 2: tumor. So this argument "
+                                                                       "should be -l 1 2. You can if you want also "
+                                                                       "evaluate the background label (0) but in "
+                                                                       "this case that would not give any useful "
+                                                                       "information.")
+    args = parser.parse_args()
+    return evaluate_folder(args.ref, args.pred, args.l)

nnunet/evaluation/metrics.py ADDED Viewed

	@@ -0,0 +1,406 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import numpy as np
+from medpy import metric
+def assert_shape(test, reference):
+    assert test.shape == reference.shape, "Shape mismatch: {} and {}".format(
+        test.shape, reference.shape)
+class ConfusionMatrix:
+    def __init__(self, test=None, reference=None):
+        self.tp = None
+        self.fp = None
+        self.tn = None
+        self.fn = None
+        self.size = None
+        self.reference_empty = None
+        self.reference_full = None
+        self.test_empty = None
+        self.test_full = None
+        self.set_reference(reference)
+        self.set_test(test)
+    def set_test(self, test):
+        self.test = test
+        self.reset()
+    def set_reference(self, reference):
+        self.reference = reference
+        self.reset()
+    def reset(self):
+        self.tp = None
+        self.fp = None
+        self.tn = None
+        self.fn = None
+        self.size = None
+        self.test_empty = None
+        self.test_full = None
+        self.reference_empty = None
+        self.reference_full = None
+    def compute(self):
+        if self.test is None or self.reference is None:
+            raise ValueError("'test' and 'reference' must both be set to compute confusion matrix.")
+        assert_shape(self.test, self.reference)
+        self.tp = int(((self.test != 0) * (self.reference != 0)).sum())
+        self.fp = int(((self.test != 0) * (self.reference == 0)).sum())
+        self.tn = int(((self.test == 0) * (self.reference == 0)).sum())
+        self.fn = int(((self.test == 0) * (self.reference != 0)).sum())
+        self.size = int(np.prod(self.reference.shape, dtype=np.int64))
+        self.test_empty = not np.any(self.test)
+        self.test_full = np.all(self.test)
+        self.reference_empty = not np.any(self.reference)
+        self.reference_full = np.all(self.reference)
+    def get_matrix(self):
+        for entry in (self.tp, self.fp, self.tn, self.fn):
+            if entry is None:
+                self.compute()
+                break
+        return self.tp, self.fp, self.tn, self.fn
+    def get_size(self):
+        if self.size is None:
+            self.compute()
+        return self.size
+    def get_existence(self):
+        for case in (self.test_empty, self.test_full, self.reference_empty, self.reference_full):
+            if case is None:
+                self.compute()
+                break
+        return self.test_empty, self.test_full, self.reference_empty, self.reference_full
+def dice(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """2TP / (2TP + FP + FN)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty and reference_empty:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0.
+    return float(2. * tp / (2 * tp + fp + fn))
+def jaccard(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TP / (TP + FP + FN)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty and reference_empty:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0.
+    return float(tp / (tp + fp + fn))
+def precision(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TP / (TP + FP)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0.
+    return float(tp / (tp + fp))
+def sensitivity(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TP / (TP + FN)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if reference_empty:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0.
+    return float(tp / (tp + fn))
+def recall(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TP / (TP + FN)"""
+    return sensitivity(test, reference, confusion_matrix, nan_for_nonexisting, **kwargs)
+def specificity(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TN / (TN + FP)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if reference_full:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0.
+    return float(tn / (tn + fp))
+def accuracy(test=None, reference=None, confusion_matrix=None, **kwargs):
+    """(TP + TN) / (TP + FP + FN + TN)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    return float((tp + tn) / (tp + fp + tn + fn))
+def fscore(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, beta=1., **kwargs):
+    """(1 + b^2) * TP / ((1 + b^2) * TP + b^2 * FN + FP)"""
+    precision_ = precision(test, reference, confusion_matrix, nan_for_nonexisting)
+    recall_ = recall(test, reference, confusion_matrix, nan_for_nonexisting)
+    return (1 + beta*beta) * precision_ * recall_ /\
+        ((beta*beta * precision_) + recall_)
+def false_positive_rate(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """FP / (FP + TN)"""
+    return 1 - specificity(test, reference, confusion_matrix, nan_for_nonexisting)
+def false_omission_rate(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """FN / (TN + FN)"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_full:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0.
+    return float(fn / (fn + tn))
+def false_negative_rate(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """FN / (TP + FN)"""
+    return 1 - sensitivity(test, reference, confusion_matrix, nan_for_nonexisting)
+def true_negative_rate(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TN / (TN + FP)"""
+    return specificity(test, reference, confusion_matrix, nan_for_nonexisting)
+def false_discovery_rate(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """FP / (TP + FP)"""
+    return 1 - precision(test, reference, confusion_matrix, nan_for_nonexisting)
+def negative_predictive_value(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, **kwargs):
+    """TN / (TN + FN)"""
+    return 1 - false_omission_rate(test, reference, confusion_matrix, nan_for_nonexisting)
+def total_positives_test(test=None, reference=None, confusion_matrix=None, **kwargs):
+    """TP + FP"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    return tp + fp
+def total_negatives_test(test=None, reference=None, confusion_matrix=None, **kwargs):
+    """TN + FN"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    return tn + fn
+def total_positives_reference(test=None, reference=None, confusion_matrix=None, **kwargs):
+    """TP + FN"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    return tp + fn
+def total_negatives_reference(test=None, reference=None, confusion_matrix=None, **kwargs):
+    """TN + FP"""
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    tp, fp, tn, fn = confusion_matrix.get_matrix()
+    return tn + fp
+def hausdorff_distance(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, voxel_spacing=None, connectivity=1, **kwargs):
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty or test_full or reference_empty or reference_full:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0
+    test, reference = confusion_matrix.test, confusion_matrix.reference
+    return metric.hd(test, reference, voxel_spacing, connectivity)
+def hausdorff_distance_95(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, voxel_spacing=None, connectivity=1, **kwargs):
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty or test_full or reference_empty or reference_full:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0
+    test, reference = confusion_matrix.test, confusion_matrix.reference
+    return metric.hd95(test, reference, voxel_spacing, connectivity)
+def avg_surface_distance(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, voxel_spacing=None, connectivity=1, **kwargs):
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty or test_full or reference_empty or reference_full:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0
+    test, reference = confusion_matrix.test, confusion_matrix.reference
+    return metric.asd(test, reference, voxel_spacing, connectivity)
+def avg_surface_distance_symmetric(test=None, reference=None, confusion_matrix=None, nan_for_nonexisting=True, voxel_spacing=None, connectivity=1, **kwargs):
+    if confusion_matrix is None:
+        confusion_matrix = ConfusionMatrix(test, reference)
+    test_empty, test_full, reference_empty, reference_full = confusion_matrix.get_existence()
+    if test_empty or test_full or reference_empty or reference_full:
+        if nan_for_nonexisting:
+            return float("NaN")
+        else:
+            return 0
+    test, reference = confusion_matrix.test, confusion_matrix.reference
+    return metric.assd(test, reference, voxel_spacing, connectivity)
+ALL_METRICS = {
+    "False Positive Rate": false_positive_rate,
+    "Dice": dice,
+    "Jaccard": jaccard,
+    "Hausdorff Distance": hausdorff_distance,
+    "Hausdorff Distance 95": hausdorff_distance_95,
+    "Precision": precision,
+    "Recall": recall,
+    "Avg. Symmetric Surface Distance": avg_surface_distance_symmetric,
+    "Avg. Surface Distance": avg_surface_distance,
+    "Accuracy": accuracy,
+    "False Omission Rate": false_omission_rate,
+    "Negative Predictive Value": negative_predictive_value,
+    "False Negative Rate": false_negative_rate,
+    "True Negative Rate": true_negative_rate,
+    "False Discovery Rate": false_discovery_rate,
+    "Total Positives Test": total_positives_test,
+    "Total Negatives Test": total_negatives_test,
+    "Total Positives Reference": total_positives_reference,
+    "total Negatives Reference": total_negatives_reference
+}