Venn commited on Mar 20

Commit

02aa18d

verified ·

1 Parent(s): 594437e

Upload maisi_ct_generative version 1.0.0

Browse files

Files changed (34) hide show

LICENSE +247 -0
configs/all_anatomy_size_condtions.json +0 -0
configs/candidate_masks_flexible_size_and_spacing_3000.json +0 -0
configs/image_median_statistics.json +72 -0
configs/inference.json +312 -0
configs/inference_trt.json +19 -0
configs/integration_test_masks.json +98 -0
configs/label_dict.json +134 -0
configs/label_dict_124_to_132.json +502 -0
configs/logging.conf +21 -0
configs/metadata.json +269 -0
configs/multi_gpu_train.json +34 -0
configs/train.json +271 -0
datasets/C4KC-KiTS_subset.json +814 -0
datasets/C4KC-KiTS_subset.zip +3 -0
datasets/IntegrationTest-AbdomenCT.nii.gz +3 -0
datasets/all_masks_flexible_size_and_spacing_3000.zip +3 -0
docs/README.md +129 -0
docs/data_license.txt +49 -0
models/autoencoder.pt +3 -0
models/controlnet-20datasets-e20wl100fold0bc_noi_dia_fsize_current.pt +3 -0
models/controlnet.pt +3 -0
models/diffusion_unet.pt +3 -0
models/input_unet3d_data-all_steps1000size512ddpm_random_current_inputx_v1.pt +3 -0
models/mask_generation_autoencoder.pt +3 -0
models/mask_generation_diffusion_unet.pt +3 -0
scripts/__init__.py +12 -0
scripts/augmentation.py +373 -0
scripts/find_masks.py +137 -0
scripts/quality_check.py +149 -0
scripts/rectified_flow.py +163 -0
scripts/sample.py +1036 -0
scripts/trainer.py +246 -0
scripts/utils.py +696 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,247 @@

+Code License
+This license applies to all files except the model weights in the directory.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+------------------------------------------------------------------------------
+Model Weights License
+This license applies to model weights in the directory.
+NVIDIA License
+1. Definitions
+“Licensor” means any person or entity that distributes its Work.
+“Work” means (a) the original work of authorship made available under this license, which may include software, documentation, or other files, and (b) any additions to or derivative works  thereof  that are made available under this license.
+The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this license, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
+Works are “made available” under this license by including in or with the Work either (a) a copyright notice referencing the applicability of this license to the Work, or (b) a copy of this license.
+2. License Grant
+2.1 Copyright Grant. Subject to the terms and conditions of this license, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
+3. Limitations
+3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this license, (b) you include a complete copy of this license with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
+3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this license (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
+3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA Corporation and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
+3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this license from such Licensor (including the grant in Section 2.1) will terminate immediately.
+3.5 Trademarks. This license does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this license.
+3.6 Termination. If you violate any term of this license, then your rights under this license (including the grant in Section 2.1) will terminate immediately.
+4. Disclaimer of Warranty.
+THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
+5. Limitation of Liability.
+EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

configs/all_anatomy_size_condtions.json ADDED Viewed

The diff for this file is too large to render. See raw diff

configs/candidate_masks_flexible_size_and_spacing_3000.json ADDED Viewed

The diff for this file is too large to render. See raw diff

configs/image_median_statistics.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+    "liver": {
+        "min_median": -14.0,
+        "max_median": 1000.0,
+        "percentile_0_5": 9.530000000000001,
+        "percentile_99_5": 162.0,
+        "sigma_6_low": -21.596463547885904,
+        "sigma_6_high": 156.27881534763367,
+        "sigma_12_low": -110.53410299564568,
+        "sigma_12_high": 245.21645479539342
+    },
+    "spleen": {
+        "min_median": -69.0,
+        "max_median": 1000.0,
+        "percentile_0_5": 16.925000000000004,
+        "percentile_99_5": 184.07500000000073,
+        "sigma_6_low": -43.133891656525165,
+        "sigma_6_high": 177.40494997185993,
+        "sigma_12_low": -153.4033124707177,
+        "sigma_12_high": 287.6743707860525
+    },
+    "pancreas": {
+        "min_median": -124.0,
+        "max_median": 1000.0,
+        "percentile_0_5": -29.0,
+        "percentile_99_5": 145.92000000000007,
+        "sigma_6_low": -56.59382515620725,
+        "sigma_6_high": 149.50627399318438,
+        "sigma_12_low": -159.64387473090306,
+        "sigma_12_high": 252.5563235678802
+    },
+    "kidney": {
+        "min_median": -165.5,
+        "max_median": 819.0,
+        "percentile_0_5": -40.0,
+        "percentile_99_5": 254.61999999999898,
+        "sigma_6_low": -130.56375604853028,
+        "sigma_6_high": 267.28163511081016,
+        "sigma_12_low": -329.4864516282005,
+        "sigma_12_high": 466.20433069048045
+    },
+    "lung": {
+        "min_median": -1000.0,
+        "max_median": 65.0,
+        "percentile_0_5": -937.0,
+        "percentile_99_5": -366.9500000000007,
+        "sigma_6_low": -1088.5583843889117,
+        "sigma_6_high": -551.8503346949108,
+        "sigma_12_low": -1356.912409235912,
+        "sigma_12_high": -283.4963098479103
+    },
+    "bone": {
+        "min_median": 77.5,
+        "max_median": 1000.0,
+        "percentile_0_5": 136.45499999999998,
+        "percentile_99_5": 551.6350000000002,
+        "sigma_6_low": 71.39901958080469,
+        "sigma_6_high": 471.9957615639765,
+        "sigma_12_low": -128.8993514107812,
+        "sigma_12_high": 672.2941325555623
+    },
+    "brain": {
+        "min_median": -1000.0,
+        "max_median": 238.0,
+        "percentile_0_5": -951.0,
+        "percentile_99_5": 126.25,
+        "sigma_6_low": -304.8208236135867,
+        "sigma_6_high": 369.5118535139189,
+        "sigma_12_low": -641.9871621773394,
+        "sigma_12_high": 706.6781920776717
+    }
+}

configs/inference.json ADDED Viewed

	@@ -0,0 +1,312 @@

+{
+    "imports": [
+        "$import torch",
+        "$from pathlib import Path",
+        "$import scripts"
+    ],
+    "bundle_root": ".",
+    "model_dir": "$@bundle_root + '/models'",
+    "output_dir": "$@bundle_root + '/output'",
+    "create_output_dir": "$Path(@output_dir).mkdir(exist_ok=True)",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "trained_autoencoder_path": "$@model_dir + '/autoencoder.pt'",
+    "trained_diffusion_path": "$@model_dir + '/diffusion_unet.pt'",
+    "trained_controlnet_path": "$@model_dir + '/controlnet.pt'",
+    "trained_mask_generation_autoencoder_path": "$@model_dir + '/mask_generation_autoencoder.pt'",
+    "trained_mask_generation_diffusion_path": "$@model_dir + '/mask_generation_diffusion_unet.pt'",
+    "all_mask_files_base_dir": "$@bundle_root + '/datasets/all_masks_flexible_size_and_spacing_3000'",
+    "all_mask_files_json": "$@bundle_root + '/configs/candidate_masks_flexible_size_and_spacing_3000.json'",
+    "all_anatomy_size_condtions_json": "$@bundle_root + '/configs/all_anatomy_size_condtions.json'",
+    "label_dict_json": "$@bundle_root + '/configs/label_dict.json'",
+    "label_dict_remap_json": "$@bundle_root + '/configs/label_dict_124_to_132.json'",
+    "real_img_median_statistics_file": "$@bundle_root + '/configs/image_median_statistics.json'",
+    "num_output_samples": 1,
+    "body_region": [],
+    "anatomy_list": [
+        "liver"
+    ],
+    "modality": "ct",
+    "controllable_anatomy_size": [],
+    "num_inference_steps": 30,
+    "mask_generation_num_inference_steps": 1000,
+    "random_seed": null,
+    "spatial_dims": 3,
+    "image_channels": 1,
+    "latent_channels": 4,
+    "output_size_xy": 512,
+    "output_size_z": 512,
+    "output_size": [
+        "@output_size_xy",
+        "@output_size_xy",
+        "@output_size_z"
+    ],
+    "image_output_ext": ".nii.gz",
+    "label_output_ext": ".nii.gz",
+    "spacing_xy": 1.0,
+    "spacing_z": 1.0,
+    "spacing": [
+        "@spacing_xy",
+        "@spacing_xy",
+        "@spacing_z"
+    ],
+    "latent_shape": [
+        "@latent_channels",
+        "$@output_size[0]//4",
+        "$@output_size[1]//4",
+        "$@output_size[2]//4"
+    ],
+    "mask_generation_latent_shape": [
+        4,
+        64,
+        64,
+        64
+    ],
+    "autoencoder_sliding_window_infer_size": [
+        80,
+        80,
+        80
+    ],
+    "autoencoder_sliding_window_infer_overlap": 0.4,
+    "autoencoder_def": {
+        "_target_": "monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@image_channels",
+        "out_channels": "@image_channels",
+        "latent_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256
+        ],
+        "num_res_blocks": [
+            2,
+            2,
+            2
+        ],
+        "norm_num_groups": 32,
+        "norm_eps": 1e-06,
+        "attention_levels": [
+            false,
+            false,
+            false
+        ],
+        "with_encoder_nonlocal_attn": false,
+        "with_decoder_nonlocal_attn": false,
+        "use_checkpointing": false,
+        "use_convtranspose": false,
+        "norm_float16": true,
+        "num_splits": 2,
+        "dim_split": 1
+    },
+    "diffusion_unet_def": {
+        "_target_": "monai.apps.generation.maisi.networks.diffusion_model_unet_maisi.DiffusionModelUNetMaisi",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "out_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256,
+            512
+        ],
+        "attention_levels": [
+            false,
+            false,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            0,
+            32,
+            32
+        ],
+        "num_res_blocks": 2,
+        "use_flash_attention": true,
+        "include_top_region_index_input": false,
+        "include_bottom_region_index_input": false,
+        "include_spacing_input": true,
+        "num_class_embeds": 128,
+        "resblock_updown": true,
+        "include_fc": true
+    },
+    "controlnet_def": {
+        "_target_": "monai.apps.generation.maisi.networks.controlnet_maisi.ControlNetMaisi",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256,
+            512
+        ],
+        "attention_levels": [
+            false,
+            false,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            0,
+            32,
+            32
+        ],
+        "num_res_blocks": 2,
+        "use_flash_attention": true,
+        "conditioning_embedding_in_channels": 8,
+        "conditioning_embedding_num_channels": [
+            8,
+            32,
+            64
+        ],
+        "num_class_embeds": 128,
+        "resblock_updown": true,
+        "include_fc": true
+    },
+    "mask_generation_autoencoder_def": {
+        "_target_": "monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": 8,
+        "out_channels": 125,
+        "latent_channels": "@latent_channels",
+        "num_channels": [
+            32,
+            64,
+            128
+        ],
+        "num_res_blocks": [
+            1,
+            2,
+            2
+        ],
+        "norm_num_groups": 32,
+        "norm_eps": 1e-06,
+        "attention_levels": [
+            false,
+            false,
+            false
+        ],
+        "with_encoder_nonlocal_attn": false,
+        "with_decoder_nonlocal_attn": false,
+        "use_flash_attention": false,
+        "use_checkpointing": true,
+        "use_convtranspose": true,
+        "norm_float16": true,
+        "num_splits": 8,
+        "dim_split": 1
+    },
+    "mask_generation_diffusion_def": {
+        "_target_": "monai.networks.nets.diffusion_model_unet.DiffusionModelUNet",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "out_channels": "@latent_channels",
+        "channels": [
+            64,
+            128,
+            256,
+            512
+        ],
+        "attention_levels": [
+            false,
+            false,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            0,
+            32,
+            32
+        ],
+        "num_res_blocks": 2,
+        "use_flash_attention": true,
+        "with_conditioning": true,
+        "upcast_attention": true,
+        "cross_attention_dim": 10
+    },
+    "autoencoder": "$@autoencoder_def.to(@device)",
+    "checkpoint_autoencoder": "$torch.load(@trained_autoencoder_path, weights_only=True)",
+    "load_autoencoder": "[email protected]_state_dict(@checkpoint_autoencoder)",
+    "diffusion_unet": "$@diffusion_unet_def.to(@device)",
+    "checkpoint_diffusion_unet": "$torch.load(@trained_diffusion_path, weights_only=False)",
+    "load_diffusion": "$@diffusion_unet.load_state_dict(@checkpoint_diffusion_unet['unet_state_dict'])",
+    "controlnet": "$@controlnet_def.to(@device)",
+    "copy_controlnet_state": "$monai.networks.utils.copy_model_state(@controlnet, @diffusion_unet.state_dict())",
+    "checkpoint_controlnet": "$torch.load(@trained_controlnet_path, weights_only=False)",
+    "load_controlnet": "[email protected]_state_dict(@checkpoint_controlnet['controlnet_state_dict'], strict=True)",
+    "scale_factor": "$@checkpoint_diffusion_unet['scale_factor'].to(@device)",
+    "mask_generation_autoencoder": "$@mask_generation_autoencoder_def.to(@device)",
+    "checkpoint_mask_generation_autoencoder": "$torch.load(@trained_mask_generation_autoencoder_path, weights_only=True)",
+    "load_mask_generation_autoencoder": "$@mask_generation_autoencoder.load_state_dict(@checkpoint_mask_generation_autoencoder, strict=True)",
+    "mask_generation_diffusion_unet": "$@mask_generation_diffusion_def.to(@device)",
+    "checkpoint_mask_generation_diffusion_unet": "$torch.load(@trained_mask_generation_diffusion_path, weights_only=True)",
+    "load_mask_generation_diffusion": "$@mask_generation_diffusion_unet.load_state_dict(@checkpoint_mask_generation_diffusion_unet['unet_state_dict'], strict=True)",
+    "mask_generation_scale_factor": "$@checkpoint_mask_generation_diffusion_unet['scale_factor']",
+    "noise_scheduler": {
+        "_target_": "scripts.rectified_flow.RFlowScheduler",
+        "num_train_timesteps": 1000,
+        "use_discrete_timesteps": false,
+        "use_timestep_transform": true,
+        "sample_method": "uniform"
+    },
+    "mask_generation_noise_scheduler": {
+        "_target_": "monai.networks.schedulers.ddpm.DDPMScheduler",
+        "num_train_timesteps": 1000,
+        "beta_start": 0.0015,
+        "beta_end": 0.0195,
+        "schedule": "scaled_linear_beta",
+        "clip_sample": false
+    },
+    "check_input": "$scripts.sample.check_input(@body_region,@anatomy_list,@label_dict_json,@output_size,@spacing,@controllable_anatomy_size)",
+    "ldm_sampler": {
+        "_target_": "scripts.sample.LDMSampler",
+        "_requires_": [
+            "@create_output_dir",
+            "@load_diffusion",
+            "@load_autoencoder",
+            "@copy_controlnet_state",
+            "@load_controlnet",
+            "@load_mask_generation_autoencoder",
+            "@load_mask_generation_diffusion",
+            "@check_input"
+        ],
+        "body_region": "@body_region",
+        "anatomy_list": "@anatomy_list",
+        "modality": "@modality",
+        "all_mask_files_json": "@all_mask_files_json",
+        "all_anatomy_size_condtions_json": "@all_anatomy_size_condtions_json",
+        "all_mask_files_base_dir": "@all_mask_files_base_dir",
+        "label_dict_json": "@label_dict_json",
+        "label_dict_remap_json": "@label_dict_remap_json",
+        "autoencoder": "@autoencoder",
+        "diffusion_unet": "@diffusion_unet",
+        "controlnet": "@controlnet",
+        "scale_factor": "@scale_factor",
+        "noise_scheduler": "@noise_scheduler",
+        "mask_generation_autoencoder": "@mask_generation_autoencoder",
+        "mask_generation_diffusion_unet": "@mask_generation_diffusion_unet",
+        "mask_generation_scale_factor": "@mask_generation_scale_factor",
+        "mask_generation_noise_scheduler": "@mask_generation_noise_scheduler",
+        "controllable_anatomy_size": "@controllable_anatomy_size",
+        "image_output_ext": "@image_output_ext",
+        "label_output_ext": "@label_output_ext",
+        "real_img_median_statistics": "@real_img_median_statistics_file",
+        "device": "@device",
+        "latent_shape": "@latent_shape",
+        "mask_generation_latent_shape": "@mask_generation_latent_shape",
+        "output_size": "@output_size",
+        "spacing": "@spacing",
+        "output_dir": "@output_dir",
+        "num_inference_steps": "@num_inference_steps",
+        "mask_generation_num_inference_steps": "@mask_generation_num_inference_steps",
+        "random_seed": "@random_seed",
+        "autoencoder_sliding_window_infer_size": "@autoencoder_sliding_window_infer_size",
+        "autoencoder_sliding_window_infer_overlap": "@autoencoder_sliding_window_infer_overlap"
+    },
+    "run": [
+        "$monai.utils.set_determinism(seed=@random_seed)",
+        "$@ldm_sampler.sample_multiple_images(@num_output_samples)"
+    ],
+    "evaluator": null
+}

configs/inference_trt.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "+imports": [
+        "$from monai.networks import trt_compile"
+    ],
+    "c_trt_args": {
+        "export_args": {
+            "dynamo": "$False",
+            "report": "$True"
+        },
+        "output_lists": [
+            [
+                -1
+            ],
+            []
+        ]
+    },
+    "controlnet": "$trt_compile(@controlnet_def.to(@device), @trained_controlnet_path, @c_trt_args)",
+    "diffusion_unet": "$trt_compile(@diffusion_unet_def.to(@device), @trained_diffusion_path)"
+}

configs/integration_test_masks.json ADDED Viewed

	@@ -0,0 +1,98 @@

+[
+    {
+        "bottom_region_index": [
+            0,
+            0,
+            0,
+            1
+        ],
+        "dim": [
+            512,
+            512,
+            512
+        ],
+        "label_list": [
+            1,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            17,
+            19,
+            25,
+            28,
+            29,
+            31,
+            32,
+            33,
+            34,
+            35,
+            36,
+            37,
+            38,
+            39,
+            40,
+            41,
+            42,
+            58,
+            59,
+            60,
+            61,
+            62,
+            69,
+            70,
+            71,
+            72,
+            73,
+            74,
+            81,
+            82,
+            83,
+            84,
+            85,
+            86,
+            93,
+            94,
+            95,
+            96,
+            97,
+            98,
+            99,
+            100,
+            101,
+            102,
+            103,
+            104,
+            105,
+            106,
+            107,
+            114,
+            115,
+            118,
+            121,
+            122,
+            127
+        ],
+        "pseudo_label_filename": "./IntegrationTest-AbdomenCT.nii.gz",
+        "spacing": [
+            1.0,
+            1.0,
+            1.0
+        ],
+        "top_region_index": [
+            0,
+            1,
+            0,
+            0
+        ]
+    }
+]

configs/label_dict.json ADDED Viewed

	@@ -0,0 +1,134 @@

+{
+    "liver": 1,
+    "dummy1": 2,
+    "spleen": 3,
+    "pancreas": 4,
+    "right kidney": 5,
+    "aorta": 6,
+    "inferior vena cava": 7,
+    "right adrenal gland": 8,
+    "left adrenal gland": 9,
+    "gallbladder": 10,
+    "esophagus": 11,
+    "stomach": 12,
+    "duodenum": 13,
+    "left kidney": 14,
+    "bladder": 15,
+    "dummy2": 16,
+    "portal vein and splenic vein": 17,
+    "dummy3": 18,
+    "small bowel": 19,
+    "dummy4": 20,
+    "dummy5": 21,
+    "brain": 22,
+    "lung tumor": 23,
+    "pancreatic tumor": 24,
+    "hepatic vessel": 25,
+    "hepatic tumor": 26,
+    "colon cancer primaries": 27,
+    "left lung upper lobe": 28,
+    "left lung lower lobe": 29,
+    "right lung upper lobe": 30,
+    "right lung middle lobe": 31,
+    "right lung lower lobe": 32,
+    "vertebrae L5": 33,
+    "vertebrae L4": 34,
+    "vertebrae L3": 35,
+    "vertebrae L2": 36,
+    "vertebrae L1": 37,
+    "vertebrae T12": 38,
+    "vertebrae T11": 39,
+    "vertebrae T10": 40,
+    "vertebrae T9": 41,
+    "vertebrae T8": 42,
+    "vertebrae T7": 43,
+    "vertebrae T6": 44,
+    "vertebrae T5": 45,
+    "vertebrae T4": 46,
+    "vertebrae T3": 47,
+    "vertebrae T2": 48,
+    "vertebrae T1": 49,
+    "vertebrae C7": 50,
+    "vertebrae C6": 51,
+    "vertebrae C5": 52,
+    "vertebrae C4": 53,
+    "vertebrae C3": 54,
+    "vertebrae C2": 55,
+    "vertebrae C1": 56,
+    "trachea": 57,
+    "left iliac artery": 58,
+    "right iliac artery": 59,
+    "left iliac vena": 60,
+    "right iliac vena": 61,
+    "colon": 62,
+    "left rib 1": 63,
+    "left rib 2": 64,
+    "left rib 3": 65,
+    "left rib 4": 66,
+    "left rib 5": 67,
+    "left rib 6": 68,
+    "left rib 7": 69,
+    "left rib 8": 70,
+    "left rib 9": 71,
+    "left rib 10": 72,
+    "left rib 11": 73,
+    "left rib 12": 74,
+    "right rib 1": 75,
+    "right rib 2": 76,
+    "right rib 3": 77,
+    "right rib 4": 78,
+    "right rib 5": 79,
+    "right rib 6": 80,
+    "right rib 7": 81,
+    "right rib 8": 82,
+    "right rib 9": 83,
+    "right rib 10": 84,
+    "right rib 11": 85,
+    "right rib 12": 86,
+    "left humerus": 87,
+    "right humerus": 88,
+    "left scapula": 89,
+    "right scapula": 90,
+    "left clavicula": 91,
+    "right clavicula": 92,
+    "left femur": 93,
+    "right femur": 94,
+    "left hip": 95,
+    "right hip": 96,
+    "sacrum": 97,
+    "left gluteus maximus": 98,
+    "right gluteus maximus": 99,
+    "left gluteus medius": 100,
+    "right gluteus medius": 101,
+    "left gluteus minimus": 102,
+    "right gluteus minimus": 103,
+    "left autochthon": 104,
+    "right autochthon": 105,
+    "left iliopsoas": 106,
+    "right iliopsoas": 107,
+    "left atrial appendage": 108,
+    "brachiocephalic trunk": 109,
+    "left brachiocephalic vein": 110,
+    "right brachiocephalic vein": 111,
+    "left common carotid artery": 112,
+    "right common carotid artery": 113,
+    "costal cartilages": 114,
+    "heart": 115,
+    "left kidney cyst": 116,
+    "right kidney cyst": 117,
+    "prostate": 118,
+    "pulmonary vein": 119,
+    "skull": 120,
+    "spinal cord": 121,
+    "sternum": 122,
+    "left subclavian artery": 123,
+    "right subclavian artery": 124,
+    "superior vena cava": 125,
+    "thyroid gland": 126,
+    "vertebrae S1": 127,
+    "bone lesion": 128,
+    "dummy6": 129,
+    "dummy7": 130,
+    "dummy8": 131,
+    "airway": 132
+}

configs/label_dict_124_to_132.json ADDED Viewed

	@@ -0,0 +1,502 @@

+{
+  "background": [
+    0,
+    0
+  ],
+  "liver": [
+    1,
+    1
+  ],
+  "spleen": [
+    2,
+    3
+  ],
+  "pancreas": [
+    3,
+    4
+  ],
+  "right kidney": [
+    4,
+    5
+  ],
+  "aorta": [
+    5,
+    6
+  ],
+  "inferior vena cava": [
+    6,
+    7
+  ],
+  "right adrenal gland": [
+    7,
+    8
+  ],
+  "left adrenal gland": [
+    8,
+    9
+  ],
+  "gallbladder": [
+    9,
+    10
+  ],
+  "esophagus": [
+    10,
+    11
+  ],
+  "stomach": [
+    11,
+    12
+  ],
+  "duodenum": [
+    12,
+    13
+  ],
+  "left kidney": [
+    13,
+    14
+  ],
+  "bladder": [
+    14,
+    15
+  ],
+  "portal vein and splenic vein": [
+    15,
+    17
+  ],
+  "small bowel": [
+    16,
+    19
+  ],
+  "brain": [
+    17,
+    22
+  ],
+  "lung tumor": [
+    18,
+    23
+  ],
+  "pancreatic tumor": [
+    19,
+    24
+  ],
+  "hepatic vessel": [
+    20,
+    25
+  ],
+  "hepatic tumor": [
+    21,
+    26
+  ],
+  "colon cancer primaries": [
+    22,
+    27
+  ],
+  "left lung upper lobe": [
+    23,
+    28
+  ],
+  "left lung lower lobe": [
+    24,
+    29
+  ],
+  "right lung upper lobe": [
+    25,
+    30
+  ],
+  "right lung middle lobe": [
+    26,
+    31
+  ],
+  "right lung lower lobe": [
+    27,
+    32
+  ],
+  "vertebrae L5": [
+    28,
+    33
+  ],
+  "vertebrae L4": [
+    29,
+    34
+  ],
+  "vertebrae L3": [
+    30,
+    35
+  ],
+  "vertebrae L2": [
+    31,
+    36
+  ],
+  "vertebrae L1": [
+    32,
+    37
+  ],
+  "vertebrae T12": [
+    33,
+    38
+  ],
+  "vertebrae T11": [
+    34,
+    39
+  ],
+  "vertebrae T10": [
+    35,
+    40
+  ],
+  "vertebrae T9": [
+    36,
+    41
+  ],
+  "vertebrae T8": [
+    37,
+    42
+  ],
+  "vertebrae T7": [
+    38,
+    43
+  ],
+  "vertebrae T6": [
+    39,
+    44
+  ],
+  "vertebrae T5": [
+    40,
+    45
+  ],
+  "vertebrae T4": [
+    41,
+    46
+  ],
+  "vertebrae T3": [
+    42,
+    47
+  ],
+  "vertebrae T2": [
+    43,
+    48
+  ],
+  "vertebrae T1": [
+    44,
+    49
+  ],
+  "vertebrae C7": [
+    45,
+    50
+  ],
+  "vertebrae C6": [
+    46,
+    51
+  ],
+  "vertebrae C5": [
+    47,
+    52
+  ],
+  "vertebrae C4": [
+    48,
+    53
+  ],
+  "vertebrae C3": [
+    49,
+    54
+  ],
+  "vertebrae C2": [
+    50,
+    55
+  ],
+  "vertebrae C1": [
+    51,
+    56
+  ],
+  "trachea": [
+    52,
+    57
+  ],
+  "left iliac artery": [
+    53,
+    58
+  ],
+  "right iliac artery": [
+    54,
+    59
+  ],
+  "left iliac vena": [
+    55,
+    60
+  ],
+  "right iliac vena": [
+    56,
+    61
+  ],
+  "colon": [
+    57,
+    62
+  ],
+  "left rib 1": [
+    58,
+    63
+  ],
+  "left rib 2": [
+    59,
+    64
+  ],
+  "left rib 3": [
+    60,
+    65
+  ],
+  "left rib 4": [
+    61,
+    66
+  ],
+  "left rib 5": [
+    62,
+    67
+  ],
+  "left rib 6": [
+    63,
+    68
+  ],
+  "left rib 7": [
+    64,
+    69
+  ],
+  "left rib 8": [
+    65,
+    70
+  ],
+  "left rib 9": [
+    66,
+    71
+  ],
+  "left rib 10": [
+    67,
+    72
+  ],
+  "left rib 11": [
+    68,
+    73
+  ],
+  "left rib 12": [
+    69,
+    74
+  ],
+  "right rib 1": [
+    70,
+    75
+  ],
+  "right rib 2": [
+    71,
+    76
+  ],
+  "right rib 3": [
+    72,
+    77
+  ],
+  "right rib 4": [
+    73,
+    78
+  ],
+  "right rib 5": [
+    74,
+    79
+  ],
+  "right rib 6": [
+    75,
+    80
+  ],
+  "right rib 7": [
+    76,
+    81
+  ],
+  "right rib 8": [
+    77,
+    82
+  ],
+  "right rib 9": [
+    78,
+    83
+  ],
+  "right rib 10": [
+    79,
+    84
+  ],
+  "right rib 11": [
+    80,
+    85
+  ],
+  "right rib 12": [
+    81,
+    86
+  ],
+  "left humerus": [
+    82,
+    87
+  ],
+  "right humerus": [
+    83,
+    88
+  ],
+  "left scapula": [
+    84,
+    89
+  ],
+  "right scapula": [
+    85,
+    90
+  ],
+  "left clavicula": [
+    86,
+    91
+  ],
+  "right clavicula": [
+    87,
+    92
+  ],
+  "left femur": [
+    88,
+    93
+  ],
+  "right femur": [
+    89,
+    94
+  ],
+  "left hip": [
+    90,
+    95
+  ],
+  "right hip": [
+    91,
+    96
+  ],
+  "sacrum": [
+    92,
+    97
+  ],
+  "left gluteus maximus": [
+    93,
+    98
+  ],
+  "right gluteus maximus": [
+    94,
+    99
+  ],
+  "left gluteus medius": [
+    95,
+    100
+  ],
+  "right gluteus medius": [
+    96,
+    101
+  ],
+  "left gluteus minimus": [
+    97,
+    102
+  ],
+  "right gluteus minimus": [
+    98,
+    103
+  ],
+  "left autochthon": [
+    99,
+    104
+  ],
+  "right autochthon": [
+    100,
+    105
+  ],
+  "left iliopsoas": [
+    101,
+    106
+  ],
+  "right iliopsoas": [
+    102,
+    107
+  ],
+  "left atrial appendage": [
+    103,
+    108
+  ],
+  "brachiocephalic trunk": [
+    104,
+    109
+  ],
+  "left brachiocephalic vein": [
+    105,
+    110
+  ],
+  "right brachiocephalic vein": [
+    106,
+    111
+  ],
+  "left common carotid artery": [
+    107,
+    112
+  ],
+  "right common carotid artery": [
+    108,
+    113
+  ],
+  "costal cartilages": [
+    109,
+    114
+  ],
+  "heart": [
+    110,
+    115
+  ],
+  "prostate": [
+    111,
+    118
+  ],
+  "pulmonary vein": [
+    112,
+    119
+  ],
+  "skull": [
+    113,
+    120
+  ],
+  "spinal cord": [
+    114,
+    121
+  ],
+  "sternum": [
+    115,
+    122
+  ],
+  "left subclavian artery": [
+    116,
+    123
+  ],
+  "right subclavian artery": [
+    117,
+    124
+  ],
+  "superior vena cava": [
+    118,
+    125
+  ],
+  "thyroid gland": [
+    119,
+    126
+  ],
+  "vertebrae S1": [
+    120,
+    127
+  ],
+  "bone lesion": [
+    121,
+    128
+  ],
+  "kidney mass": [
+    122,
+    129
+  ],
+  "airway": [
+    123,
+    132
+  ],
+  "body": [
+    124,
+    200
+  ]
+}

configs/logging.conf ADDED Viewed

	@@ -0,0 +1,21 @@

+[loggers]
+keys=root
+[handlers]
+keys=consoleHandler
+[formatters]
+keys=fullFormatter
+[logger_root]
+level=INFO
+handlers=consoleHandler
+[handler_consoleHandler]
+class=StreamHandler
+level=INFO
+formatter=fullFormatter
+args=(sys.stdout,)
+[formatter_fullFormatter]
+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s

configs/metadata.json ADDED Viewed

	@@ -0,0 +1,269 @@

+{
+    "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_generator_ldm_20240318.json",
+    "version": "1.0.0",
+    "changelog": {
+        "1.0.0": "accelerated maisi, inference only, is not compartible with previous maisi diffusion model weights",
+        "0.4.6": "add TensorRT support",
+        "0.4.5": "update README",
+        "0.4.4": "update issue for IgniteInfo",
+        "0.4.3": "remove download large files, add weights_only when loading weights and add label_dict to large files",
+        "0.4.2": "update train.json to fix finetune ckpt bug",
+        "0.4.1": "update large files",
+        "0.4.0": "update to use monai 1.4, model ckpt updated, rm GenerativeAI repo, add quality check",
+        "0.3.6": "first oss version"
+    },
+    "monai_version": "1.4.0",
+    "pytorch_version": "2.4.0",
+    "numpy_version": "1.24.4",
+    "optional_packages_version": {
+        "fire": "0.6.0",
+        "nibabel": "5.2.1",
+        "tqdm": "4.66.4"
+    },
+    "supported_apps": {
+        "maisi-nim": ""
+    },
+    "name": "CT image latent diffusion generation",
+    "task": "CT image synthesis",
+    "description": "A generative model for creating 3D CT from Gaussian noise",
+    "authors": "MONAI team",
+    "copyright": "Copyright (c) MONAI Consortium",
+    "data_source": "http://medicaldecathlon.com/",
+    "data_type": "nibabel",
+    "image_classes": "Flair brain MRI with 1.1x1.1x1.1 mm voxel size",
+    "eval_metrics": {},
+    "intended_use": "This is a research tool/prototype and not to be used clinically",
+    "references": [],
+    "autoencoder_data_format": {
+        "inputs": {
+            "image": {
+                "type": "feature",
+                "format": "image",
+                "num_channels": 4,
+                "spatial_shape": [
+                    128,
+                    128,
+                    128
+                ],
+                "dtype": "float16",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true
+            },
+            "body_region": {
+                "type": "array",
+                "value_range": [
+                    "head",
+                    "abdomen",
+                    "chest/thorax",
+                    "pelvis/lower"
+                ]
+            },
+            "anatomy_list": {
+                "type": "array",
+                "value_range": [
+                    "liver",
+                    "spleen",
+                    "pancreas",
+                    "right kidney",
+                    "aorta",
+                    "inferior vena cava",
+                    "right adrenal gland",
+                    "left adrenal gland",
+                    "gallbladder",
+                    "esophagus",
+                    "stomach",
+                    "duodenum",
+                    "left kidney",
+                    "bladder",
+                    "portal vein and splenic vein",
+                    "small bowel",
+                    "brain",
+                    "lung tumor",
+                    "pancreatic tumor",
+                    "hepatic vessel",
+                    "hepatic tumor",
+                    "colon cancer primaries",
+                    "left lung upper lobe",
+                    "left lung lower lobe",
+                    "right lung upper lobe",
+                    "right lung middle lobe",
+                    "right lung lower lobe",
+                    "vertebrae L5",
+                    "vertebrae L4",
+                    "vertebrae L3",
+                    "vertebrae L2",
+                    "vertebrae L1",
+                    "vertebrae T12",
+                    "vertebrae T11",
+                    "vertebrae T10",
+                    "vertebrae T9",
+                    "vertebrae T8",
+                    "vertebrae T7",
+                    "vertebrae T6",
+                    "vertebrae T5",
+                    "vertebrae T4",
+                    "vertebrae T3",
+                    "vertebrae T2",
+                    "vertebrae T1",
+                    "vertebrae C7",
+                    "vertebrae C6",
+                    "vertebrae C5",
+                    "vertebrae C4",
+                    "vertebrae C3",
+                    "vertebrae C2",
+                    "vertebrae C1",
+                    "trachea",
+                    "left iliac artery",
+                    "right iliac artery",
+                    "left iliac vena",
+                    "right iliac vena",
+                    "colon",
+                    "left rib 1",
+                    "left rib 2",
+                    "left rib 3",
+                    "left rib 4",
+                    "left rib 5",
+                    "left rib 6",
+                    "left rib 7",
+                    "left rib 8",
+                    "left rib 9",
+                    "left rib 10",
+                    "left rib 11",
+                    "left rib 12",
+                    "right rib 1",
+                    "right rib 2",
+                    "right rib 3",
+                    "right rib 4",
+                    "right rib 5",
+                    "right rib 6",
+                    "right rib 7",
+                    "right rib 8",
+                    "right rib 9",
+                    "right rib 10",
+                    "right rib 11",
+                    "right rib 12",
+                    "left humerus",
+                    "right humerus",
+                    "left scapula",
+                    "right scapula",
+                    "left clavicula",
+                    "right clavicula",
+                    "left femur",
+                    "right femur",
+                    "left hip",
+                    "right hip",
+                    "sacrum",
+                    "left gluteus maximus",
+                    "right gluteus maximus",
+                    "left gluteus medius",
+                    "right gluteus medius",
+                    "left gluteus minimus",
+                    "right gluteus minimus",
+                    "left autochthon",
+                    "right autochthon",
+                    "left iliopsoas",
+                    "right iliopsoas",
+                    "left atrial appendage",
+                    "brachiocephalic trunk",
+                    "left brachiocephalic vein",
+                    "right brachiocephalic vein",
+                    "left common carotid artery",
+                    "right common carotid artery",
+                    "costal cartilages",
+                    "heart",
+                    "left kidney cyst",
+                    "right kidney cyst",
+                    "prostate",
+                    "pulmonary vein",
+                    "skull",
+                    "spinal cord",
+                    "sternum",
+                    "left subclavian artery",
+                    "right subclavian artery",
+                    "superior vena cava",
+                    "thyroid gland",
+                    "vertebrae S1",
+                    "bone lesion",
+                    "airway"
+                ]
+            }
+        },
+        "outputs": {
+            "pred": {
+                "type": "image",
+                "format": "image",
+                "num_channels": 1,
+                "spatial_shape": [
+                    512,
+                    512,
+                    512
+                ],
+                "dtype": "float16",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true,
+                "channel_def": {
+                    "0": "image"
+                }
+            }
+        }
+    },
+    "generator_data_format": {
+        "inputs": {
+            "latent": {
+                "type": "noise",
+                "format": "image",
+                "num_channels": 4,
+                "spatial_shape": [
+                    128,
+                    128,
+                    128
+                ],
+                "dtype": "float16",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true
+            },
+            "condition": {
+                "type": "timesteps",
+                "format": "timesteps",
+                "num_channels": 1,
+                "spatial_shape": [],
+                "dtype": "long",
+                "value_range": [
+                    0,
+                    1000
+                ],
+                "is_patch_data": false
+            }
+        },
+        "outputs": {
+            "pred": {
+                "type": "feature",
+                "format": "image",
+                "num_channels": 4,
+                "spatial_shape": [
+                    128,
+                    128,
+                    128
+                ],
+                "dtype": "float16",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true,
+                "channel_def": {
+                    "0": "image"
+                }
+            }
+        }
+    }
+}

configs/multi_gpu_train.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
+    "use_tensorboard": "$dist.get_rank() == 0",
+    "controlnet": {
+        "_target_": "torch.nn.parallel.DistributedDataParallel",
+        "module": "$@controlnet_def.to(@device)",
+        "find_unused_parameters": true,
+        "device_ids": [
+            "@device"
+        ]
+    },
+    "load_controlnet": "[email protected]_state_dict(@checkpoint_controlnet['controlnet_state_dict'], strict=True)",
+    "train#sampler": {
+        "_target_": "DistributedSampler",
+        "dataset": "@train#dataset",
+        "even_divisible": true,
+        "shuffle": true
+    },
+    "train#dataloader#sampler": "@train#sampler",
+    "train#dataloader#shuffle": false,
+    "train#trainer#train_handlers": "$@train#handlers[: -1 if dist.get_rank() > 0 else None]",
+    "initialize": [
+        "$import torch.distributed as dist",
+        "$dist.is_initialized() or dist.init_process_group(backend='nccl')",
+        "$torch.cuda.set_device(@device)",
+        "$monai.utils.set_determinism(seed=123)"
+    ],
+    "run": [
+        "$@train#trainer.run()"
+    ],
+    "finalize": [
+        "$dist.is_initialized() and dist.destroy_process_group()"
+    ]
+}

configs/train.json ADDED Viewed

	@@ -0,0 +1,271 @@

+{
+    "imports": [
+        "$import glob",
+        "$import os",
+        "$import scripts",
+        "$import ignite"
+    ],
+    "bundle_root": ".",
+    "ckpt_dir": "$@bundle_root + '/models'",
+    "output_dir": "$@bundle_root + '/output'",
+    "data_list_file_path": "$@bundle_root + '/datasets/C4KC-KiTS_subset.json'",
+    "dataset_dir": "$@bundle_root + '/datasets/C4KC-KiTS_subset'",
+    "trained_diffusion_path": "$@ckpt_dir + '/input_unet3d_data-all_steps1000size512ddpm_random_current_inputx_v1.pt'",
+    "trained_controlnet_path": "$@ckpt_dir + '/controlnet-20datasets-e20wl100fold0bc_noi_dia_fsize_current.pt'",
+    "use_tensorboard": true,
+    "fold": 0,
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "epochs": 100,
+    "batch_size": 1,
+    "val_at_start": false,
+    "learning_rate": 0.0001,
+    "weighted_loss_label": [
+        129
+    ],
+    "weighted_loss": 100,
+    "amp": true,
+    "train_datalist": "$scripts.utils.maisi_datafold_read(json_list=@data_list_file_path, data_base_dir=@dataset_dir, fold=@fold)[0]",
+    "spatial_dims": 3,
+    "image_channels": 1,
+    "latent_channels": 4,
+    "diffusion_unet_def": {
+        "_target_": "monai.apps.generation.maisi.networks.diffusion_model_unet_maisi.DiffusionModelUNetMaisi",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "out_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256,
+            512
+        ],
+        "attention_levels": [
+            false,
+            false,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            0,
+            32,
+            32
+        ],
+        "num_res_blocks": 2,
+        "use_flash_attention": true,
+        "include_top_region_index_input": true,
+        "include_bottom_region_index_input": true,
+        "include_spacing_input": true
+    },
+    "controlnet_def": {
+        "_target_": "monai.apps.generation.maisi.networks.controlnet_maisi.ControlNetMaisi",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256,
+            512
+        ],
+        "attention_levels": [
+            false,
+            false,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            0,
+            32,
+            32
+        ],
+        "num_res_blocks": 2,
+        "use_flash_attention": true,
+        "conditioning_embedding_in_channels": 8,
+        "conditioning_embedding_num_channels": [
+            8,
+            32,
+            64
+        ]
+    },
+    "noise_scheduler": {
+        "_target_": "monai.networks.schedulers.ddpm.DDPMScheduler",
+        "num_train_timesteps": 1000,
+        "beta_start": 0.0015,
+        "beta_end": 0.0195,
+        "schedule": "scaled_linear_beta",
+        "clip_sample": false
+    },
+    "unzip_dataset": "$scripts.utils.unzip_dataset(@dataset_dir)",
+    "diffusion_unet": "$@diffusion_unet_def.to(@device)",
+    "checkpoint_diffusion_unet": "$torch.load(@trained_diffusion_path, weights_only=False)",
+    "load_diffusion": "$@diffusion_unet.load_state_dict(@checkpoint_diffusion_unet['unet_state_dict'])",
+    "controlnet": "$@controlnet_def.to(@device)",
+    "copy_controlnet_state": "$monai.networks.utils.copy_model_state(@controlnet, @diffusion_unet.state_dict())",
+    "checkpoint_controlnet": "$torch.load(@trained_controlnet_path, weights_only=False)",
+    "load_controlnet": "[email protected]_state_dict(@checkpoint_controlnet['controlnet_state_dict'], strict=True)",
+    "scale_factor": "$@checkpoint_diffusion_unet['scale_factor'].to(@device)",
+    "loss": {
+        "_target_": "torch.nn.L1Loss",
+        "reduction": "none"
+    },
+    "optimizer": {
+        "_target_": "torch.optim.AdamW",
+        "params": "[email protected]()",
+        "lr": "@learning_rate",
+        "weight_decay": 1e-05
+    },
+    "lr_schedule": {
+        "activate": true,
+        "lr_scheduler": {
+            "_target_": "torch.optim.lr_scheduler.PolynomialLR",
+            "optimizer": "@optimizer",
+            "total_iters": "$(@epochs * len(@train#dataloader.dataset)) / @batch_size",
+            "power": 2.0
+        }
+    },
+    "train": {
+        "deterministic_transforms": [
+            {
+                "_target_": "LoadImaged",
+                "keys": [
+                    "image",
+                    "label"
+                ],
+                "image_only": true,
+                "ensure_channel_first": true
+            },
+            {
+                "_target_": "Orientationd",
+                "keys": [
+                    "label"
+                ],
+                "axcodes": "RAS"
+            },
+            {
+                "_target_": "EnsureTyped",
+                "keys": [
+                    "label"
+                ],
+                "dtype": "$torch.uint8",
+                "track_meta": true
+            },
+            {
+                "_target_": "Lambdad",
+                "keys": "top_region_index",
+                "func": "$lambda x: torch.FloatTensor(x)"
+            },
+            {
+                "_target_": "Lambdad",
+                "keys": "bottom_region_index",
+                "func": "$lambda x: torch.FloatTensor(x)"
+            },
+            {
+                "_target_": "Lambdad",
+                "keys": "spacing",
+                "func": "$lambda x: torch.FloatTensor(x)"
+            },
+            {
+                "_target_": "Lambdad",
+                "keys": "top_region_index",
+                "func": "$lambda x: x * 1e2"
+            },
+            {
+                "_target_": "Lambdad",
+                "keys": "bottom_region_index",
+                "func": "$lambda x: x * 1e2"
+            },
+            {
+                "_target_": "Lambdad",
+                "keys": "spacing",
+                "func": "$lambda x: x * 1e2"
+            }
+        ],
+        "inferer": {
+            "_target_": "SimpleInferer"
+        },
+        "preprocessing": {
+            "_target_": "Compose",
+            "transforms": "$@train#deterministic_transforms"
+        },
+        "dataset": {
+            "_target_": "Dataset",
+            "data": "@train_datalist",
+            "transform": "@train#preprocessing"
+        },
+        "dataloader": {
+            "_target_": "DataLoader",
+            "dataset": "@train#dataset",
+            "batch_size": "@batch_size",
+            "shuffle": true,
+            "num_workers": 4,
+            "pin_memory": true,
+            "persistent_workers": true
+        },
+        "handlers": [
+            {
+                "_target_": "LrScheduleHandler",
+                "_disabled_": "$not @lr_schedule#activate",
+                "lr_scheduler": "@lr_schedule#lr_scheduler",
+                "epoch_level": false,
+                "print_lr": true
+            },
+            {
+                "_target_": "CheckpointSaver",
+                "save_dir": "@ckpt_dir",
+                "save_dict": {
+                    "controlnet_state_dict": "@controlnet",
+                    "optimizer": "@optimizer"
+                },
+                "save_interval": 1,
+                "n_saved": 5
+            },
+            {
+                "_target_": "TensorBoardStatsHandler",
+                "_disabled_": "$not @use_tensorboard",
+                "log_dir": "@output_dir",
+                "tag_name": "train_loss",
+                "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
+            },
+            {
+                "_target_": "StatsHandler",
+                "tag_name": "train_loss",
+                "name": "StatsHandler",
+                "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
+            }
+        ],
+        "trainer": {
+            "_target_": "scripts.trainer.MAISIControlNetTrainer",
+            "_requires_": [
+                "@load_diffusion",
+                "@copy_controlnet_state",
+                "@load_controlnet",
+                "@unzip_dataset"
+            ],
+            "max_epochs": "@epochs",
+            "device": "@device",
+            "train_data_loader": "@train#dataloader",
+            "diffusion_unet": "@diffusion_unet",
+            "controlnet": "@controlnet",
+            "noise_scheduler": "@noise_scheduler",
+            "loss_function": "@loss",
+            "optimizer": "@optimizer",
+            "inferer": "@train#inferer",
+            "key_train_metric": null,
+            "train_handlers": "@train#handlers",
+            "amp": "@amp",
+            "hyper_kwargs": {
+                "weighted_loss": "@weighted_loss",
+                "weighted_loss_label": "@weighted_loss_label",
+                "scale_factor": "@scale_factor"
+            }
+        }
+    },
+    "initialize": [
+        "$monai.utils.set_determinism(seed=0)"
+    ],
+    "run": [
+        "$@train#trainer.add_event_handler(ignite.engine.Events.ITERATION_COMPLETED, ignite.handlers.TerminateOnNan())",
+        "$@train#trainer.run()"
+    ]
+}

datasets/C4KC-KiTS_subset.json ADDED Viewed

	@@ -0,0 +1,814 @@

+{
+    "training": [
+        {
+            "image": "KiTS-00186/2_arterial_emb_zs99.nii.gz",
+            "label": "KiTS-00186/mask_combined_label_zs99_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00066/3_arterial_emb_zs11.nii.gz",
+            "label": "KiTS-00066/mask_combined_label_zs11_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00012/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00012/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00055/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00055/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00193/100_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00193/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00142/7_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00142/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00069/3_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00069/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00124/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00124/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00208/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00208/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00116/9_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00116/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 0,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00061/4_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00061/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                0,
+                1,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00040/3_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00040/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00068/7_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00068/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00036/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00036/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00153/8_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00153/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00189/2_arterial_emb_zs107.nii.gz",
+            "label": "KiTS-00189/mask_combined_label_zs107_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00091/7_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00091/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00110/3_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00110/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00046/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00046/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00178/3_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00178/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00075/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00075/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00037/6_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00037/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00130/9_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00130/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00063/6_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00063/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00205/4_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00205/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00167/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00167/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00059/8_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00059/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                1,
+                0
+            ]
+        },
+        {
+            "image": "KiTS-00172/3_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00172/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00093/7_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00093/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        },
+        {
+            "image": "KiTS-00197/2_arterial_emb_zs0.nii.gz",
+            "label": "KiTS-00197/mask_combined_label_zs0_wbdm.nii.gz",
+            "fold": 1,
+            "dim": [
+                512,
+                512,
+                512
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "top_region_index": [
+                0,
+                1,
+                0,
+                0
+            ],
+            "bottom_region_index": [
+                0,
+                0,
+                0,
+                1
+            ]
+        }
+    ]
+}

datasets/C4KC-KiTS_subset.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bb65d194571c8db8e26ac911b04898cd54376f3c76a0303be70c4f883102088
+size 3155140827

datasets/IntegrationTest-AbdomenCT.nii.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50b4a84769a31aeadd5f6d1a2bece82ba138bfb0eabe94ab13894fc8eb5dac90
+size 7493659

datasets/all_masks_flexible_size_and_spacing_3000.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d89ebad0762448eca5b01e7b2e3199439111af50519fd4b8f124bc9e62968eb
+size 9028952285

docs/README.md ADDED Viewed

	@@ -0,0 +1,129 @@

+# Model Overview
+This bundle is for Nvidia MAISI (Medical AI for Synthetic Imaging), a 3D Latent Diffusion Model that can generate large CT images with paired segmentation masks, variable volume size and voxel size, as well as controllable organ/tumor size.
+The inference workflow of MAISI is depicted in the figure below. It first generates latent features from random noise by applying multiple denoising steps using the trained diffusion model. Then it decodes the denoised latent features into images using the trained autoencoder.
+<p align="center">
+  <img src="https://developer.download.nvidia.com/assets/Clara/Images/maisi_workflow_1.0.1.png" alt="MAISI inference scheme">
+</p>
+MAISI is based on the following papers:
+[**Latent Diffusion:** Rombach, Robin, et al. "High-resolution image synthesis with latent diffusion models." CVPR 2022.](https://openaccess.thecvf.com/content/CVPR2022/papers/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.pdf)
+[**ControlNet:**  Lvmin Zhang, Anyi Rao, Maneesh Agrawala; “Adding Conditional Control to Text-to-Image Diffusion Models.” ICCV 2023.](https://openaccess.thecvf.com/content/ICCV2023/papers/Zhang_Adding_Conditional_Control_to_Text-to-Image_Diffusion_Models_ICCV_2023_paper.pdf)
+[**Rectified Flow:** Liu, Xingchao, and Chengyue Gong. "Flow Straight and Fast: Learning to Generate and Transfer Data with Rectified Flow." ICLR 2023.](https://arxiv.org/pdf/2209.03003)
+#### Example synthetic image
+An example result from inference is shown below:
+![Example synthetic image](https://developer.download.nvidia.com/assets/Clara/Images/monai_maisi_ct_generative_example_synthetic_data.png)
+### Inference configuration
+The inference requires:
+- GPU: at least 58GB GPU memory for 512 x 512 x 512
+- Disk Memory: at least 21GB disk memory
+#### Inference parameters:
+The information for the inference input, like body region and anatomy to generate, is stored in [./configs/inference.json](../configs/inference.json). Please feel free to play with it. Here are the details of the parameters.
+- `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
+- `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5&times;1.5&times;2.0 mm. The spacing for x and y axes has to be between 0.5 and 3.0 mm and the spacing for the z axis has to be between 0.5 and 5.0 mm.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512&times;512&times;256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768&times;768&times;512 mm. We recommend output_size is the FOV in x and y axis are same and to be at least 256mm for head, at least 384mm for other body regions like abdomen, and no larger than 640mm. The output size for the x and y axes can be selected from [256, 384, 512], while for the z axis, it can be chosen from [128, 256, 384, 512, 640, 768].
+- `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. In addition, if the size scale is set to -1, it indicates that the organ does not exist or should be removed. The output will contain paired image and segmentation mask for the controllable anatomy.
+The following organs support generation with a controllable size: ``["liver", "gallbladder", "stomach", "pancreas", "colon", "lung tumor", "bone lesion", "hepatic tumor", "colon cancer primaries", "pancreatic tumor"]``.
+The raw output of the current mask generation model has a fixed size of $256^3$ voxels with a spacing of $1.5^3$ mm. If the "output_size" differs from this default, the generated masks will be resampled to the desired `"output_size"` and `"spacing"`. Note that resampling may degrade the quality of the generated masks and could trigger multiple inference attempts if the images fail to pass the [image quality check](../scripts/quality_check.py).
+- `"body_region"`: Deprecated, please leave it as empty `"[]"`.
+- `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
+- `"autoencoder_sliding_window_infer_size"`: in order to save GPU memory, we use sliding window inference when decoding latents to image when `"output_size"` is large. This is the patch size of the sliding window. Small value will reduce GPU memory but increase time cost. They need to be divisible by 16.
+- `"autoencoder_sliding_window_infer_overlap"`: float between 0 and 1. Large value will reduce the stitching artifacts when stitching patches during sliding window inference, but increase time cost. If you do not observe seam lines in the generated image result, you can use a smaller value to save inference time.
+To generate images with substantial dimensions, such as 512 &times; 512 &times; 512 or larger, using GPUs with 80GB of memory, it is advisable to configure the `"num_splits"` parameter in [the auto-encoder configuration](./configs/config_maisi.json#L11-L37) to 16. This adjustment is crucial to avoid out-of-memory issues during inference.
+#### Recommended spacing for different output sizes:
+|`"output_size"`| Recommended `"spacing"`|
+|:-----:|:-----:|
+[256, 256, 256]  | [1.5, 1.5, 1.5] |
+[512, 512, 128]  | [0.8, 0.8, 2.5] |
+[512, 512, 512]  | [1.0, 1.0, 1.0] |
+### Execute inference
+The following code generates a synthetic image from a random sampled noise.
+```
+python -m monai.bundle run --config_file configs/inference.json
+```
+## Execute Finetuning
+### Training configuration
+The training was performed with the following:
+- GPU: at least 60GB GPU memory for 512 x 512 x 512 volume
+- Actual Model Input (the size of image embedding in latent space): 128 x 128 x 128
+- AMP: True
+### Run finetuning:
+This config executes finetuning for pretrained ControlNet with with a new class (i.e., Kidney Tumor). When finetuning with new class names, please update `configs/train.json`'s `weighted_loss_label` and `configs/label_dict.json` accordingly. There are 8 dummy labels as placeholders in default `configs/label_dict.json` that can be used for finetuning.
+```
+python -m monai.bundle run --config_file configs/train.json
+```
+### Override the `train` config to execute multi-GPU training:
+```
+torchrun --standalone --nnodes=1 --nproc_per_node=2 -m monai.bundle run --config_file "['configs/train.json','configs/multi_gpu_train.json']"
+```
+### Data:
+The preprocessed subset of [C4KC-KiTS](https://www.cancerimagingarchive.net/collection/c4kc-kits/) dataset used in this finetuning config is provided in `./dataset/C4KC-KiTS_subset`.
+```
+            |-*arterial*.nii.gz     # original image
+            |-*arterial_emb*.nii.gz     # encoded image embedding
+KiTS-000* --|-mask*.nii.gz      # original labels
+            |-mask_pseudo_label*.nii.gz     # pseudo labels
+            |-mask_combined_label*.nii.gz     # combined mask of original and pseudo labels
+```
+An example combined mask of original and pseudo labels is shown below:
+![example_combined_mask](https://developer.download.nvidia.com/assets/Clara/Images/monai_maisi_ct_generative_example_combined_mask.png)
+Please note that the label of Kidney Tumor is mapped to index `129` in this preprocessed dataset. The encoded image embedding is generated by provided `Autoencoder` in `./models/autoencoder_epoch273.pt` during preprocessing to save memeory usage for training. The pseudo labels are generated by [VISTA 3D](https://github.com/Project-MONAI/VISTA). In addition, the dimension of each volume and corresponding pseudo label is resampled to the closest multiple of 128 (e.g., 128, 256, 384, 512, ...).
+The training workflow requires one JSON file to specify the image embedding and segmentation pairs. The example file is located in the `./dataset/C4KC-KiTS_subset.json`.
+The JSON file has the following structure:
+```python
+{
+    "training": [
+        {
+            "image": "*/*arterial_emb*.nii.gz",  # relative path to the image embedding file
+            "label": "*/mask_combined_label*.nii.gz",  # relative path to the combined label file
+            "dim": [512, 512, 512],  # the dimension of image
+            "spacing": [1.0, 1.0, 1.0],  # the spacing of image
+            "top_region_index": [0, 1, 0, 0],  # the top region index of the image
+            "bottom_region_index": [0, 0, 0, 1],  # the bottom region index of the image
+            "fold": 0  # fold index for cross validation, fold 0 is used for training
+        },
+        ...
+    ]
+}
+```
+# References
+[1] Rombach, Robin, et al. "High-resolution image synthesis with latent diffusion models." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2022. https://openaccess.thecvf.com/content/CVPR2022/papers/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.pdf
+# License
+## Code License
+This project includes code licensed under the Apache License 2.0.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+## Model Weights License
+The model weights included in this project are licensed under the NCLS v1 License.
+Both licenses' full texts have been combined into a single `LICENSE` file. Please refer to this `LICENSE` file for more details about the terms and conditions of both licenses.

docs/data_license.txt ADDED Viewed

	@@ -0,0 +1,49 @@

+Third Party Licenses
+-----------------------------------------------------------------------
+/*********************************************************************/
+i. Multimodal Brain Tumor Segmentation Challenge 2018
+    https://www.med.upenn.edu/sbia/brats2018/data.html
+/*********************************************************************/
+Data Usage Agreement / Citations
+You are free to use and/or refer to the BraTS datasets in your own
+research, provided that you always cite the following two manuscripts:
+[1] Menze BH, Jakab A, Bauer S, Kalpathy-Cramer J, Farahani K, Kirby
+[J, Burren Y, Porz N, Slotboom J, Wiest R, Lanczi L, Gerstner E, Weber
+[MA, Arbel T, Avants BB, Ayache N, Buendia P, Collins DL, Cordier N,
+[Corso JJ, Criminisi A, Das T, Delingette H, Demiralp Γ, Durst CR,
+[Dojat M, Doyle S, Festa J, Forbes F, Geremia E, Glocker B, Golland P,
+[Guo X, Hamamci A, Iftekharuddin KM, Jena R, John NM, Konukoglu E,
+[Lashkari D, Mariz JA, Meier R, Pereira S, Precup D, Price SJ, Raviv
+[TR, Reza SM, Ryan M, Sarikaya D, Schwartz L, Shin HC, Shotton J,
+[Silva CA, Sousa N, Subbanna NK, Szekely G, Taylor TJ, Thomas OM,
+[Tustison NJ, Unal G, Vasseur F, Wintermark M, Ye DH, Zhao L, Zhao B,
+[Zikic D, Prastawa M, Reyes M, Van Leemput K. "The Multimodal Brain
+[Tumor Image Segmentation Benchmark (BRATS)", IEEE Transactions on
+[Medical Imaging 34(10), 1993-2024 (2015) DOI:
+[10.1109/TMI.2014.2377694
+[2] Bakas S, Akbari H, Sotiras A, Bilello M, Rozycki M, Kirby JS,
+[Freymann JB, Farahani K, Davatzikos C. "Advancing The Cancer Genome
+[Atlas glioma MRI collections with expert segmentation labels and
+[radiomic features", Nature Scientific Data, 4:170117 (2017) DOI:
+[10.1038/sdata.2017.117
+In addition, if there are no restrictions imposed from the
+journal/conference you submit your paper about citing "Data
+Citations", please be specific and also cite the following:
+[3] Bakas S, Akbari H, Sotiras A, Bilello M, Rozycki M, Kirby J,
+[Freymann J, Farahani K, Davatzikos C. "Segmentation Labels and
+[Radiomic Features for the Pre-operative Scans of the TCGA-GBM
+[collection", The Cancer Imaging Archive, 2017. DOI:
+[10.7937/K9/TCIA.2017.KLXWJJ1Q
+[4] Bakas S, Akbari H, Sotiras A, Bilello M, Rozycki M, Kirby J,
+[Freymann J, Farahani K, Davatzikos C. "Segmentation Labels and
+[Radiomic Features for the Pre-operative Scans of the TCGA-LGG
+[collection", The Cancer Imaging Archive, 2017. DOI:
+[10.7937/K9/TCIA.2017.GJQ7R0EF

models/autoencoder.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f8a7a056d0ebc00486edc43c26768bf1c12eaa6df9dd172e34598003be95eb3
+size 83831868

models/controlnet-20datasets-e20wl100fold0bc_noi_dia_fsize_current.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05fda7bfffde524d56cc2dc5b990f901216bc46c4b5e261404aebc409d27b78b
+size 278366962

models/controlnet.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:468c7c1d358530b9ebbdd643e4c1b1c1e4037df154e0bc15d21fc49e56a57f75
+size 288255799

models/diffusion_unet.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc1ec59782f5ac7c0d22dd501654732109a971b93cbaa6607c4106a7f29066f
+size 2166600232

models/input_unet3d_data-all_steps1000size512ddpm_random_current_inputx_v1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b32be13118f9d6a077d42dd250c09c5e41673b48dbf2f35f2c587a7a9ebe5686
+size 685298858

models/mask_generation_autoencoder.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:539175f6ede3cb1e6f01bfd6347cd446d601cf4a508fc632a1e36362b1428a5d
+size 21072774

models/mask_generation_diffusion_unet.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7d668b1356e9b94b8269decadf8f8116dc4ee2d365580d05349b4ddf6739155
+size 788941780

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import sample, utils

scripts/augmentation.py ADDED Viewed

	@@ -0,0 +1,373 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import torch
+import torch.nn.functional as F
+from monai.transforms import Rand3DElastic, RandAffine, RandZoom
+from monai.utils import ensure_tuple_rep
+def erode3d(input_tensor, erosion=3):
+    # Define the structuring element
+    erosion = ensure_tuple_rep(erosion, 3)
+    structuring_element = torch.ones(1, 1, erosion[0], erosion[1], erosion[2]).to(input_tensor.device)
+    # Pad the input tensor to handle border pixels
+    input_padded = F.pad(
+        input_tensor.float().unsqueeze(0).unsqueeze(0),
+        (erosion[0] // 2, erosion[0] // 2, erosion[1] // 2, erosion[1] // 2, erosion[2] // 2, erosion[2] // 2),
+        mode="constant",
+        value=1.0,
+    )
+    # Apply erosion operation
+    output = F.conv3d(input_padded, structuring_element, padding=0)
+    # Set output values based on the minimum value within the structuring element
+    output = torch.where(output == torch.sum(structuring_element), 1.0, 0.0)
+    return output.squeeze(0).squeeze(0)
+def dilate3d(input_tensor, erosion=3):
+    # Define the structuring element
+    erosion = ensure_tuple_rep(erosion, 3)
+    structuring_element = torch.ones(1, 1, erosion[0], erosion[1], erosion[2]).to(input_tensor.device)
+    # Pad the input tensor to handle border pixels
+    input_padded = F.pad(
+        input_tensor.float().unsqueeze(0).unsqueeze(0),
+        (erosion[0] // 2, erosion[0] // 2, erosion[1] // 2, erosion[1] // 2, erosion[2] // 2, erosion[2] // 2),
+        mode="constant",
+        value=1.0,
+    )
+    # Apply erosion operation
+    output = F.conv3d(input_padded, structuring_element, padding=0)
+    # Set output values based on the minimum value within the structuring element
+    output = torch.where(output > 0, 1.0, 0.0)
+    return output.squeeze(0).squeeze(0)
+def augmentation_tumor_bone(pt_nda, output_size, random_seed):
+    volume = pt_nda.squeeze(0)
+    real_l_volume_ = torch.zeros_like(volume)
+    real_l_volume_[volume == 128] = 1
+    real_l_volume_ = real_l_volume_.to(torch.uint8)
+    elastic = RandAffine(
+        mode="nearest",
+        prob=1.0,
+        translate_range=(5, 5, 0),
+        rotate_range=(0, 0, 0.1),
+        scale_range=(0.15, 0.15, 0),
+        padding_mode="zeros",
+    )
+    elastic.set_random_state(seed=random_seed)
+    tumor_szie = torch.sum((real_l_volume_ > 0).float())
+    ###########################
+    # remove pred in pseudo_label in real lesion region
+    volume[real_l_volume_ > 0] = 200
+    ###########################
+    if tumor_szie > 0:
+        # get organ mask
+        organ_mask = (
+            torch.logical_and(33 <= volume, volume <= 56).float()
+            + torch.logical_and(63 <= volume, volume <= 97).float()
+            + (volume == 127).float()
+            + (volume == 114).float()
+            + real_l_volume_
+        )
+        organ_mask = (organ_mask > 0).float()
+        cnt = 0
+        while True:
+            threshold = 0.8 if cnt < 40 else 0.75
+            real_l_volume = real_l_volume_
+            # random distor mask
+            distored_mask = elastic((real_l_volume > 0).cuda(), spatial_size=tuple(output_size)).as_tensor()
+            real_l_volume = distored_mask * organ_mask
+            cnt += 1
+            print(torch.sum(real_l_volume), "|", tumor_szie * threshold)
+            if torch.sum(real_l_volume) >= tumor_szie * threshold:
+                real_l_volume = dilate3d(real_l_volume.squeeze(0), erosion=5)
+                real_l_volume = erode3d(real_l_volume, erosion=5).unsqueeze(0).to(torch.uint8)
+                break
+    else:
+        real_l_volume = real_l_volume_
+    volume[real_l_volume == 1] = 128
+    pt_nda = volume.unsqueeze(0)
+    return pt_nda
+def augmentation_tumor_liver(pt_nda, output_size, random_seed):
+    volume = pt_nda.squeeze(0)
+    real_l_volume_ = torch.zeros_like(volume)
+    real_l_volume_[volume == 1] = 1
+    real_l_volume_[volume == 26] = 2
+    real_l_volume_ = real_l_volume_.to(torch.uint8)
+    elastic = Rand3DElastic(
+        mode="nearest",
+        prob=1.0,
+        sigma_range=(5, 8),
+        magnitude_range=(100, 200),
+        translate_range=(10, 10, 10),
+        rotate_range=(np.pi / 36, np.pi / 36, np.pi / 36),
+        scale_range=(0.2, 0.2, 0.2),
+        padding_mode="zeros",
+    )
+    elastic.set_random_state(seed=random_seed)
+    tumor_szie = torch.sum(real_l_volume_ == 2)
+    ###########################
+    # remove pred  organ labels
+    volume[volume == 1] = 0
+    volume[volume == 26] = 0
+    # before move tumor maks, full the original location by organ labels
+    volume[real_l_volume_ == 1] = 1
+    volume[real_l_volume_ == 2] = 1
+    ###########################
+    while True:
+        real_l_volume = real_l_volume_
+        # random distor mask
+        real_l_volume = elastic((real_l_volume == 2).cuda(), spatial_size=tuple(output_size)).as_tensor()
+        # get organ mask
+        organ_mask = (real_l_volume_ == 1).float() + (real_l_volume_ == 2).float()
+        organ_mask = dilate3d(organ_mask.squeeze(0), erosion=5)
+        organ_mask = erode3d(organ_mask, erosion=5).unsqueeze(0)
+        real_l_volume = real_l_volume * organ_mask
+        print(torch.sum(real_l_volume), "|", tumor_szie * 0.80)
+        if torch.sum(real_l_volume) >= tumor_szie * 0.80:
+            real_l_volume = dilate3d(real_l_volume.squeeze(0), erosion=5)
+            real_l_volume = erode3d(real_l_volume, erosion=5).unsqueeze(0)
+            break
+    volume[real_l_volume == 1] = 26
+    pt_nda = volume.unsqueeze(0)
+    return pt_nda
+def augmentation_tumor_lung(pt_nda, output_size, random_seed):
+    volume = pt_nda.squeeze(0)
+    real_l_volume_ = torch.zeros_like(volume)
+    real_l_volume_[volume == 23] = 1
+    real_l_volume_ = real_l_volume_.to(torch.uint8)
+    elastic = Rand3DElastic(
+        mode="nearest",
+        prob=1.0,
+        sigma_range=(5, 8),
+        magnitude_range=(100, 200),
+        translate_range=(20, 20, 20),
+        rotate_range=(np.pi / 36, np.pi / 36, np.pi),
+        scale_range=(0.15, 0.15, 0.15),
+        padding_mode="zeros",
+    )
+    elastic.set_random_state(seed=random_seed)
+    tumor_szie = torch.sum(real_l_volume_)
+    # before move lung tumor maks, full the original location by lung labels
+    new_real_l_volume_ = dilate3d(real_l_volume_.squeeze(0), erosion=3)
+    new_real_l_volume_ = new_real_l_volume_.unsqueeze(0)
+    new_real_l_volume_[real_l_volume_ > 0] = 0
+    new_real_l_volume_[volume < 28] = 0
+    new_real_l_volume_[volume > 32] = 0
+    tmp = volume[(volume * new_real_l_volume_).nonzero(as_tuple=True)].view(-1)
+    mode = torch.mode(tmp, 0)[0].item()
+    print(mode)
+    assert 28 <= mode <= 32
+    volume[real_l_volume_.bool()] = mode
+    ###########################
+    if tumor_szie > 0:
+        # aug
+        while True:
+            real_l_volume = real_l_volume_
+            # random distor mask
+            real_l_volume = elastic(real_l_volume, spatial_size=tuple(output_size)).as_tensor()
+            # get lung mask v2 (133 order)
+            lung_mask = (
+                (volume == 28).float()
+                + (volume == 29).float()
+                + (volume == 30).float()
+                + (volume == 31).float()
+                + (volume == 32).float()
+            )
+            lung_mask = dilate3d(lung_mask.squeeze(0), erosion=5)
+            lung_mask = erode3d(lung_mask, erosion=5).unsqueeze(0)
+            real_l_volume = real_l_volume * lung_mask
+            print(torch.sum(real_l_volume), "|", tumor_szie * 0.85)
+            if torch.sum(real_l_volume) >= tumor_szie * 0.85:
+                real_l_volume = dilate3d(real_l_volume.squeeze(0), erosion=5)
+                real_l_volume = erode3d(real_l_volume, erosion=5).unsqueeze(0).to(torch.uint8)
+                break
+    else:
+        real_l_volume = real_l_volume_
+    volume[real_l_volume == 1] = 23
+    pt_nda = volume.unsqueeze(0)
+    return pt_nda
+def augmentation_tumor_pancreas(pt_nda, output_size, random_seed):
+    volume = pt_nda.squeeze(0)
+    real_l_volume_ = torch.zeros_like(volume)
+    real_l_volume_[volume == 4] = 1
+    real_l_volume_[volume == 24] = 2
+    real_l_volume_ = real_l_volume_.to(torch.uint8)
+    elastic = Rand3DElastic(
+        mode="nearest",
+        prob=1.0,
+        sigma_range=(5, 8),
+        magnitude_range=(100, 200),
+        translate_range=(15, 15, 15),
+        rotate_range=(np.pi / 36, np.pi / 36, np.pi / 36),
+        scale_range=(0.1, 0.1, 0.1),
+        padding_mode="zeros",
+    )
+    elastic.set_random_state(seed=random_seed)
+    tumor_szie = torch.sum(real_l_volume_ == 2)
+    ###########################
+    # remove pred  organ labels
+    volume[volume == 24] = 0
+    volume[volume == 4] = 0
+    # before move tumor maks, full the original location by organ labels
+    volume[real_l_volume_ == 1] = 4
+    volume[real_l_volume_ == 2] = 4
+    ###########################
+    while True:
+        real_l_volume = real_l_volume_
+        # random distor mask
+        real_l_volume = elastic((real_l_volume == 2).cuda(), spatial_size=tuple(output_size)).as_tensor()
+        # get organ mask
+        organ_mask = (real_l_volume_ == 1).float() + (real_l_volume_ == 2).float()
+        organ_mask = dilate3d(organ_mask.squeeze(0), erosion=5)
+        organ_mask = erode3d(organ_mask, erosion=5).unsqueeze(0)
+        real_l_volume = real_l_volume * organ_mask
+        print(torch.sum(real_l_volume), "|", tumor_szie * 0.80)
+        if torch.sum(real_l_volume) >= tumor_szie * 0.80:
+            real_l_volume = dilate3d(real_l_volume.squeeze(0), erosion=5)
+            real_l_volume = erode3d(real_l_volume, erosion=5).unsqueeze(0)
+            break
+    volume[real_l_volume == 1] = 24
+    pt_nda = volume.unsqueeze(0)
+    return pt_nda
+def augmentation_tumor_colon(pt_nda, output_size, random_seed):
+    volume = pt_nda.squeeze(0)
+    real_l_volume_ = torch.zeros_like(volume)
+    real_l_volume_[volume == 27] = 1
+    real_l_volume_ = real_l_volume_.to(torch.uint8)
+    elastic = Rand3DElastic(
+        mode="nearest",
+        prob=1.0,
+        sigma_range=(5, 8),
+        magnitude_range=(100, 200),
+        translate_range=(5, 5, 5),
+        rotate_range=(np.pi / 36, np.pi / 36, np.pi / 36),
+        scale_range=(0.1, 0.1, 0.1),
+        padding_mode="zeros",
+    )
+    elastic.set_random_state(seed=random_seed)
+    tumor_szie = torch.sum(real_l_volume_)
+    ###########################
+    # before move tumor maks, full the original location by organ labels
+    volume[real_l_volume_.bool()] = 62
+    ###########################
+    if tumor_szie > 0:
+        # get organ mask
+        organ_mask = (volume == 62).float()
+        organ_mask = dilate3d(organ_mask.squeeze(0), erosion=5)
+        organ_mask = erode3d(organ_mask, erosion=5).unsqueeze(0)
+        #         cnt = 0
+        cnt = 0
+        while True:
+            threshold = 0.8
+            real_l_volume = real_l_volume_
+            if cnt < 20:
+                # random distor mask
+                distored_mask = elastic((real_l_volume == 1).cuda(), spatial_size=tuple(output_size)).as_tensor()
+                real_l_volume = distored_mask * organ_mask
+            elif 20 <= cnt < 40:
+                threshold = 0.75
+            else:
+                break
+            real_l_volume = real_l_volume * organ_mask
+            print(torch.sum(real_l_volume), "|", tumor_szie * threshold)
+            cnt += 1
+            if torch.sum(real_l_volume) >= tumor_szie * threshold:
+                real_l_volume = dilate3d(real_l_volume.squeeze(0), erosion=5)
+                real_l_volume = erode3d(real_l_volume, erosion=5).unsqueeze(0).to(torch.uint8)
+                break
+    else:
+        real_l_volume = real_l_volume_
+    #     break
+    volume[real_l_volume == 1] = 27
+    pt_nda = volume.unsqueeze(0)
+    return pt_nda
+def augmentation_body(pt_nda, random_seed):
+    volume = pt_nda.squeeze(0)
+    zoom = RandZoom(min_zoom=0.99, max_zoom=1.01, mode="nearest", align_corners=None, prob=1.0)
+    zoom.set_random_state(seed=random_seed)
+    volume = zoom(volume)
+    pt_nda = volume.unsqueeze(0)
+    return pt_nda
+def augmentation(pt_nda, output_size, random_seed):
+    label_list = torch.unique(pt_nda)
+    label_list = list(label_list.cpu().numpy())
+    if 128 in label_list:
+        print("augmenting bone lesion/tumor")
+        pt_nda = augmentation_tumor_bone(pt_nda, output_size, random_seed)
+    elif 26 in label_list:
+        print("augmenting liver tumor")
+        pt_nda = augmentation_tumor_liver(pt_nda, output_size, random_seed)
+    elif 23 in label_list:
+        print("augmenting lung tumor")
+        pt_nda = augmentation_tumor_lung(pt_nda, output_size, random_seed)
+    elif 24 in label_list:
+        print("augmenting pancreas tumor")
+        pt_nda = augmentation_tumor_pancreas(pt_nda, output_size, random_seed)
+    elif 27 in label_list:
+        print("augmenting colon tumor")
+        pt_nda = augmentation_tumor_colon(pt_nda, output_size, random_seed)
+    else:
+        print("augmenting body")
+        pt_nda = augmentation_body(pt_nda, random_seed)
+    return pt_nda

scripts/find_masks.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+from typing import Sequence
+from monai.apps.utils import extractall
+from monai.utils import ensure_tuple_rep
+def convert_body_region(body_region: str | Sequence[str]) -> Sequence[int]:
+    """
+    Convert body region string to body region index.
+    Args:
+        body_region: list of input body region string. If single str, will be converted to list of str.
+    Return:
+        body_region_indices, list of input body region index.
+    """
+    if type(body_region) is str:
+        body_region = [body_region]
+    # body region mapping for maisi
+    region_mapping_maisi = {
+        "head": 0,
+        "chest": 1,
+        "thorax": 1,
+        "chest/thorax": 1,
+        "abdomen": 2,
+        "pelvis": 3,
+        "lower": 3,
+        "pelvis/lower": 3,
+    }
+    # perform mapping
+    body_region_indices = []
+    for region in body_region:
+        normalized_region = region.lower()  # norm str to lower case
+        if normalized_region not in region_mapping_maisi:
+            raise ValueError(f"Invalid region: {normalized_region}")
+        body_region_indices.append(region_mapping_maisi[normalized_region])
+    return body_region_indices
+def find_masks(
+    anatomy_list: int | Sequence[int],
+    spacing: Sequence[float] | float = 1.0,
+    output_size: Sequence[int] = (512, 512, 512),
+    check_spacing_and_output_size: bool = False,
+    database_filepath: str = "./configs/database.json",
+    mask_foldername: str = "./datasets/masks/",
+):
+    """
+    Find candidate masks that fullfills all the requirements.
+    They shoud contain all the anatomies in `anatomy_list`.
+    If there is no tumor specified in `anatomy_list`, we also expect the candidate masks to be tumor free.
+    If check_spacing_and_output_size is True, the candidate masks need to have the expected `spacing` and `output_size`.
+    Args:
+        anatomy_list: list of input anatomy. The found candidate mask will include these anatomies.
+        spacing: list of three floats, voxel spacing. If providing a single number, will use it for all the three dimensions.
+        output_size: list of three int, expected candidate mask spatial size.
+        check_spacing_and_output_size: whether we expect candidate mask to have spatial size of `output_size`
+            and voxel size of `spacing`.
+        database_filepath: path for the json file that stores the information of all the candidate masks.
+        mask_foldername: directory that saves all the candidate masks.
+    Return:
+        candidate_masks, list of dict, each dict contains information of one candidate mask that fullfills all the requirements.
+    """
+    # check and preprocess input
+    if isinstance(anatomy_list, int):
+        anatomy_list = [anatomy_list]
+    spacing = ensure_tuple_rep(spacing, 3)
+    if not os.path.exists(mask_foldername):
+        zip_file_path = mask_foldername + ".zip"
+        if not os.path.isfile(zip_file_path):
+            raise ValueError(f"Please download {zip_file_path} following the instruction in ./datasets/README.md.")
+        print(f"Extracting {zip_file_path} to {os.path.dirname(zip_file_path)}")
+        extractall(filepath=zip_file_path, output_dir=os.path.dirname(zip_file_path), file_type="zip")
+        print(f"Unzipped {zip_file_path} to {mask_foldername}.")
+    if not os.path.isfile(database_filepath):
+        raise ValueError(f"Please download {database_filepath} following the instruction in ./datasets/README.md.")
+    with open(database_filepath, "r") as f:
+        db = json.load(f)
+    # select candidate_masks
+    candidate_masks = []
+    for _item in db:
+        if not set(anatomy_list).issubset(_item["label_list"]):
+            continue
+        # whether to keep this mask, default to be True.
+        keep_mask = True
+        for tumor_label in [23, 24, 26, 27, 128]:
+            # we skip those mask with tumors if users do not provide tumor label in anatomy_list
+            if tumor_label not in anatomy_list and tumor_label in _item["label_list"]:
+                keep_mask = False
+        if check_spacing_and_output_size:
+            # if the output_size and spacing are different with user's input, skip it
+            for axis in range(3):
+                if _item["dim"][axis] != output_size[axis] or _item["spacing"][axis] != spacing[axis]:
+                    keep_mask = False
+        if keep_mask:
+            # if decide to keep this mask, we pack the information of this mask and add to final output.
+            candidate = {
+                "pseudo_label": os.path.join(mask_foldername, _item["pseudo_label_filename"]),
+                "spacing": _item["spacing"],
+                "dim": _item["dim"],
+            }
+            # Conditionally add the label to the candidate dictionary
+            if "label_filename" in _item:
+                candidate["label"] = os.path.join(mask_foldername, _item["label_filename"])
+            candidate_masks.append(candidate)
+    if len(candidate_masks) == 0 and not check_spacing_and_output_size:
+        raise ValueError("Cannot find body region with given anatomy list.")
+    return candidate_masks

scripts/quality_check.py ADDED Viewed

	@@ -0,0 +1,149 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+def get_masked_data(label_data, image_data, labels):
+    """
+    Extracts and returns the image data corresponding to specified labels within a 3D volume.
+    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array.
+    The function handles cases with both a large and small number of labels, optimizing performance accordingly.
+    Args:
+        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical
+                                 regions or classes in a 3D medical image.
+        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions
+                                 will be extracted.
+        labels (list of int): A list of integers representing the label values to be used for masking.
+    Returns:
+        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified
+                    labels in `label_data`. If no labels are provided, an empty array is returned.
+    Raises:
+        ValueError: If `image_data` and `label_data` do not have the same shape.
+    Example:
+        label_int_dict = {"liver": [1], "kidney": [5, 14]}
+        masked_data = get_masked_data(label_data, image_data, label_int_dict["kidney"])
+    """
+    # Check if the shapes of image_data and label_data match
+    if image_data.shape != label_data.shape:
+        raise ValueError(
+            f"Shape mismatch: image_data has shape {image_data.shape}, "
+            f"but label_data has shape {label_data.shape}. They must be the same."
+        )
+    if not labels:
+        return np.array([])  # Return an empty array if no labels are provided
+    labels = list(set(labels))  # remove duplicate items
+    # Optimize performance based on the number of labels
+    num_label_acceleration_thresh = 3
+    if len(labels) >= num_label_acceleration_thresh:
+        # if many labels, np.isin is faster
+        mask = np.isin(label_data, labels)
+    else:
+        # Use logical OR to combine masks if the number of labels is small
+        mask = np.zeros_like(label_data, dtype=bool)
+        for label in labels:
+            mask = np.logical_or(mask, label_data == label)
+    # Retrieve the masked data
+    masked_data = image_data[mask.astype(bool)]
+    return masked_data
+def is_outlier(statistics, image_data, label_data, label_int_dict):
+    """
+    Perform a quality check on the generated image by comparing its statistics with precomputed thresholds.
+    Args:
+        statistics (dict): Dictionary containing precomputed statistics including mean +/- 3sigma ranges.
+        image_data (np.ndarray): The image data to be checked, typically a 3D NumPy array.
+        label_data (np.ndarray): The label data corresponding to the image, used for masking regions of interest.
+        label_int_dict (dict): Dictionary mapping label names to their corresponding integer lists.
+            e.g., label_int_dict = {"liver": [1], "kidney": [5, 14]}
+    Returns:
+        dict: A dictionary with labels as keys, each containing the quality check result,
+              including whether it's an outlier, the median value, and the thresholds used.
+              If no data is found for a label, the median value will be `None` and `is_outlier` will be `False`.
+    Example:
+        # Example input data
+        statistics = {
+            "liver": {
+                "sigma_6_low": -21.596463547885904,
+                "sigma_6_high": 156.27881534763367
+            },
+            "kidney": {
+                "sigma_6_low": -15.0,
+                "sigma_6_high": 120.0
+            }
+        }
+        label_int_dict = {
+            "liver": [1],
+            "kidney": [5, 14]
+        }
+        image_data = np.random.rand(100, 100, 100)  # Replace with actual image data
+        label_data = np.zeros((100, 100, 100))  # Replace with actual label data
+        label_data[40:60, 40:60, 40:60] = 1  # Example region for liver
+        label_data[70:90, 70:90, 70:90] = 5  # Example region for kidney
+        result = is_outlier(statistics, image_data, label_data, label_int_dict)
+    """
+    outlier_results = {}
+    for label_name, stats in statistics.items():
+        # Get the thresholds from the statistics
+        low_thresh = min(stats["sigma_6_low"], stats["percentile_0_5"])  # or "sigma_12_low" depending on your needs
+        high_thresh = max(stats["sigma_6_high"], stats["percentile_99_5"])  # or "sigma_12_high" depending on your needs
+        if label_name == "bone":
+            high_thresh = 1000.0
+        # Retrieve the corresponding label integers
+        labels = label_int_dict.get(label_name, [])
+        masked_data = get_masked_data(label_data, image_data, labels)
+        masked_data = masked_data[~np.isnan(masked_data)]
+        if len(masked_data) == 0 or masked_data.size == 0:
+            outlier_results[label_name] = {
+                "is_outlier": False,
+                "median_value": None,
+                "low_thresh": low_thresh,
+                "high_thresh": high_thresh,
+            }
+            continue
+        # Compute the median of the masked region
+        median_value = np.nanmedian(masked_data)
+        if np.isnan(median_value):
+            median_value = None
+            is_outlier = False
+        else:
+            # Determine if the median value is an outlier
+            is_outlier = median_value < low_thresh or median_value > high_thresh
+        outlier_results[label_name] = {
+            "is_outlier": is_outlier,
+            "median_value": median_value,
+            "low_thresh": low_thresh,
+            "high_thresh": high_thresh,
+        }
+    return outlier_results

scripts/rectified_flow.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from typing import Any
+import numpy as np
+import torch
+from monai.networks.schedulers import Scheduler
+from torch.distributions import LogisticNormal
+# code modified from https://github.com/hpcaitech/Open-Sora/blob/main/opensora/schedulers/rf/rectified_flow.py
+def timestep_transform(
+    t, input_img_size, base_img_size=32 * 32 * 32, scale=1.0, num_train_timesteps=1000, spatial_dim=3
+):
+    t = t / num_train_timesteps
+    ratio_space = (input_img_size / base_img_size).pow(1.0 / spatial_dim)
+    ratio = ratio_space * scale
+    new_t = ratio * t / (1 + (ratio - 1) * t)
+    new_t = new_t * num_train_timesteps
+    return new_t
+class RFlowScheduler(Scheduler):
+    def __init__(
+        self,
+        num_train_timesteps=1000,
+        num_inference_steps=10,
+        use_discrete_timesteps=False,
+        sample_method="uniform",
+        loc=0.0,
+        scale=1.0,
+        use_timestep_transform=False,
+        transform_scale=1.0,
+        steps_offset: int = 0,
+    ):
+        self.num_train_timesteps = num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+        self.use_discrete_timesteps = use_discrete_timesteps
+        # sample method
+        assert sample_method in ["uniform", "logit-normal"]
+        # assert (
+        #     sample_method == "uniform" or not use_discrete_timesteps
+        # ), "Only uniform sampling is supported for discrete timesteps"
+        self.sample_method = sample_method
+        if sample_method == "logit-normal":
+            self.distribution = LogisticNormal(torch.tensor([loc]), torch.tensor([scale]))
+            self.sample_t = lambda x: self.distribution.sample((x.shape[0],))[:, 0].to(x.device)
+        # timestep transform
+        self.use_timestep_transform = use_timestep_transform
+        self.transform_scale = transform_scale
+        self.steps_offset = steps_offset
+    def add_noise(
+        self, original_samples: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
+    ) -> torch.FloatTensor:
+        """
+        compatible with diffusers add_noise()
+        """
+        timepoints = timesteps.float() / self.num_train_timesteps
+        timepoints = 1 - timepoints  # [1,1/1000]
+        # timepoint  (bsz) noise: (bsz, 4, frame, w ,h)
+        # expand timepoint to noise shape
+        timepoints = timepoints.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1)
+        timepoints = timepoints.repeat(1, noise.shape[1], noise.shape[2], noise.shape[3], noise.shape[4])
+        return timepoints * original_samples + (1 - timepoints) * noise
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        device: str | torch.device | None = None,
+        input_img_size: int | None = None,
+        base_img_size: int = 32 * 32 * 32,
+    ) -> None:
+        """
+        Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
+        Args:
+            num_inference_steps: number of diffusion steps used when generating samples with a pre-trained model.
+            device: target device to put the data.
+            input_img_size: int, H*W*D of the image, used with self.use_timestep_transform is True.
+            base_img_size: int, reference H*W*D size, used with self.use_timestep_transform is True.
+        """
+        if num_inference_steps > self.num_train_timesteps:
+            raise ValueError(
+                f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.num_train_timesteps`:"
+                f" {self.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+                f" maximal {self.num_train_timesteps} timesteps."
+            )
+        self.num_inference_steps = num_inference_steps
+        # prepare timesteps
+        timesteps = [
+            (1.0 - i / self.num_inference_steps) * self.num_train_timesteps for i in range(self.num_inference_steps)
+        ]
+        if self.use_discrete_timesteps:
+            timesteps = [int(round(t)) for t in timesteps]
+        if self.use_timestep_transform:
+            timesteps = [
+                timestep_transform(
+                    t,
+                    input_img_size=input_img_size,
+                    base_img_size=base_img_size,
+                    num_train_timesteps=self.num_train_timesteps,
+                )
+                for t in timesteps
+            ]
+        timesteps = np.array(timesteps).astype(np.float16)
+        if self.use_discrete_timesteps:
+            timesteps = timesteps.astype(np.int64)
+        self.timesteps = torch.from_numpy(timesteps).to(device)
+        self.timesteps += self.steps_offset
+        print(self.timesteps)
+    def sample_timesteps(self, x_start):
+        if self.sample_method == "uniform":
+            t = torch.rand((x_start.shape[0],), device=x_start.device) * self.num_train_timesteps
+        elif self.sample_method == "logit-normal":
+            t = self.sample_t(x_start) * self.num_train_timesteps
+        if self.use_discrete_timesteps:
+            t = t.long()
+        if self.use_timestep_transform:
+            input_img_size = torch.prod(torch.tensor(x_start.shape[-3:]))
+            base_img_size = 32 * 32 * 32
+            t = timestep_transform(
+                t,
+                input_img_size=input_img_size,
+                base_img_size=base_img_size,
+                num_train_timesteps=self.num_train_timesteps,
+            )
+        return t
+    def step(
+        self, model_output: torch.Tensor, timestep: int, sample: torch.Tensor, next_timestep=None
+    ) -> tuple[torch.Tensor, Any]:
+        """
+        Predict the sample at the previous timestep. Core function to propagate the diffusion
+        process from the learned model outputs.
+        Args:
+            model_output: direct output from learned diffusion model.
+            timestep: current discrete timestep in the diffusion chain.
+            sample: current instance of sample being created by diffusion process.
+        Returns:
+            pred_prev_sample: Predicted previous sample
+            None
+        """
+        v_pred = model_output
+        if next_timestep is None:
+            dt = 1.0 / self.num_inference_steps
+        else:
+            dt = timestep - next_timestep
+            dt = dt / self.num_train_timesteps
+        z = sample + v_pred * dt
+        return z, None

scripts/sample.py ADDED Viewed

	@@ -0,0 +1,1036 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import os
+import random
+import time
+from datetime import datetime
+import monai
+import torch
+from monai.data import MetaTensor
+from monai.inferers.inferer import DiffusionInferer, SlidingWindowInferer
+from monai.transforms import Compose, SaveImage
+from monai.utils import set_determinism
+from tqdm import tqdm
+from .augmentation import augmentation
+from .find_masks import find_masks
+from .quality_check import is_outlier
+from .utils import binarize_labels, dynamic_infer, general_mask_generation_post_process, remap_labels
+modality_mapping = {
+    "unknown": 0,
+    "ct": 1,
+    "ct_wo_contrast": 2,
+    "ct_contrast": 3,
+    "mri": 8,
+    "mri_t1": 9,
+    "mri_t2": 10,
+    "mri_flair": 11,
+    "mri_pd": 12,
+    "mri_dwi": 13,
+    "mri_adc": 14,
+    "mri_ssfp": 15,
+    "mri_mra": 16,
+}  # current version only support "ct"
+class ReconModel(torch.nn.Module):
+    """
+    A PyTorch module for reconstructing images from latent representations.
+    Attributes:
+        autoencoder: The autoencoder model used for decoding.
+        scale_factor: Scaling factor applied to the input before decoding.
+    """
+    def __init__(self, autoencoder, scale_factor):
+        super().__init__()
+        self.autoencoder = autoencoder
+        self.scale_factor = scale_factor
+    def forward(self, z):
+        """
+        Decode the input latent representation to an image.
+        Args:
+            z (torch.Tensor): The input latent representation.
+        Returns:
+            torch.Tensor: The reconstructed image.
+        """
+        recon_pt_nda = self.autoencoder.decode_stage_2_outputs(z / self.scale_factor)
+        return recon_pt_nda
+def initialize_noise_latents(latent_shape, device):
+    """
+    Initialize random noise latents for image generation with float16.
+    Args:
+        latent_shape (tuple): The shape of the latent space.
+        device (torch.device): The device to create the tensor on.
+    Returns:
+        torch.Tensor: Initialized noise latents.
+    """
+    return torch.randn([1] + list(latent_shape)).half().to(device)
+def ldm_conditional_sample_one_mask(
+    autoencoder,
+    diffusion_unet,
+    noise_scheduler,
+    scale_factor,
+    anatomy_size,
+    device,
+    latent_shape,
+    label_dict_remap_json,
+    num_inference_steps=1000,
+    autoencoder_sliding_window_infer_size=(96, 96, 96),
+    autoencoder_sliding_window_infer_overlap=0.6667,
+):
+    """
+    Generate a single synthetic mask using a latent diffusion model.
+    Args:
+        autoencoder (nn.Module): The autoencoder model.
+        diffusion_unet (nn.Module): The diffusion U-Net model.
+        noise_scheduler: The noise scheduler for the diffusion process.
+        scale_factor (float): Scaling factor for the latent space.
+        anatomy_size (torch.Tensor): Tensor specifying the desired anatomy sizes.
+        device (torch.device): The device to run the computation on.
+        latent_shape (tuple): The shape of the latent space.
+        label_dict_remap_json (str): Path to the JSON file for label remapping.
+        num_inference_steps (int): Number of inference steps for the diffusion process.
+        autoencoder_sliding_window_infer_size (list, optional): Size of the sliding window for inference. Defaults to [96, 96, 96].
+        autoencoder_sliding_window_infer_overlap (float, optional): Overlap ratio for sliding window inference. Defaults to 0.6667.
+    Returns:
+        torch.Tensor: The generated synthetic mask.
+    """
+    recon_model = ReconModel(autoencoder=autoencoder, scale_factor=scale_factor).to(device)
+    with torch.no_grad(), torch.amp.autocast("cuda"):
+        # Generate random noise
+        latents = initialize_noise_latents(latent_shape, device)
+        anatomy_size = torch.FloatTensor(anatomy_size).unsqueeze(0).unsqueeze(0).half().to(device)
+        # synthesize latents
+        noise_scheduler.set_timesteps(num_inference_steps=num_inference_steps)
+        inferer_ddpm = DiffusionInferer(noise_scheduler)
+        latents = inferer_ddpm.sample(
+            input_noise=latents,
+            diffusion_model=diffusion_unet,
+            scheduler=noise_scheduler,
+            verbose=True,
+            conditioning=anatomy_size.to(device),
+        )
+        # decode latents to synthesized masks
+        inferer = SlidingWindowInferer(
+            roi_size=autoencoder_sliding_window_infer_size,
+            sw_batch_size=1,
+            progress=True,
+            mode="gaussian",
+            overlap=autoencoder_sliding_window_infer_overlap,
+            device=torch.device("cpu"),
+            sw_device=device,
+        )
+        synthetic_mask = dynamic_infer(inferer, recon_model, latents)
+        synthetic_mask = torch.softmax(synthetic_mask, dim=1)
+        synthetic_mask = torch.argmax(synthetic_mask, dim=1, keepdim=True)
+        # mapping raw index to 132 labels
+        synthetic_mask = remap_labels(synthetic_mask, label_dict_remap_json)
+        # post process
+        data = synthetic_mask.squeeze().cpu().detach().numpy()
+        labels = [23, 24, 26, 27, 128]
+        target_tumor_label = None
+        for index, size in enumerate(anatomy_size[0, 0, 5:10]):
+            if size.item() != -1.0:
+                target_tumor_label = labels[index]
+        logging.info(f"target_tumor_label for postprocess:{target_tumor_label}")
+        data = general_mask_generation_post_process(data, target_tumor_label=target_tumor_label, device=device)
+        synthetic_mask = torch.from_numpy(data).unsqueeze(0).unsqueeze(0).to(device)
+    return synthetic_mask
+def ldm_conditional_sample_one_image(
+    autoencoder,
+    diffusion_unet,
+    controlnet,
+    noise_scheduler,
+    scale_factor,
+    device,
+    combine_label_or,
+    modality_tensor,
+    spacing_tensor,
+    latent_shape,
+    output_size,
+    noise_factor,
+    num_inference_steps=1000,
+    autoencoder_sliding_window_infer_size=(96, 96, 96),
+    autoencoder_sliding_window_infer_overlap=0.6667,
+):
+    """
+    Generate a single synthetic image using a latent diffusion model with controlnet.
+    Args:
+        autoencoder (nn.Module): The autoencoder model.
+        diffusion_unet (nn.Module): The diffusion U-Net model.
+        controlnet (nn.Module): The controlnet model.
+        noise_scheduler: The noise scheduler for the diffusion process.
+        scale_factor (float): Scaling factor for the latent space.
+        device (torch.device): The device to run the computation on.
+        combine_label_or (torch.Tensor): The combined label tensor.
+        spacing_tensor (torch.Tensor): Tensor specifying the spacing.
+        latent_shape (tuple): The shape of the latent space.
+        output_size (tuple): The desired output size of the image.
+        noise_factor (float): Factor to scale the initial noise.
+        num_inference_steps (int): Number of inference steps for the diffusion process.
+        autoencoder_sliding_window_infer_size (list, optional): Size of the sliding window for inference. Defaults to [96, 96, 96].
+        autoencoder_sliding_window_infer_overlap (float, optional): Overlap ratio for sliding window inference. Defaults to 0.6667.
+    Returns:
+        tuple: A tuple containing the synthetic image and its corresponding label.
+    """
+    # CT image intensity range
+    a_min = -1000
+    a_max = 1000
+    # autoencoder output intensity range
+    b_min = 0.0
+    b_max = 1
+    recon_model = ReconModel(autoencoder=autoencoder, scale_factor=scale_factor).to(device)
+    with torch.no_grad(), torch.amp.autocast("cuda", enabled=True):
+        logging.info("---- Start generating latent features... ----")
+        start_time = time.time()
+        # generate segmentation mask
+        combine_label = combine_label_or.to(device)
+        if (
+            output_size[0] != combine_label.shape[2]
+            or output_size[1] != combine_label.shape[3]
+            or output_size[2] != combine_label.shape[4]
+        ):
+            logging.info(
+                "output_size is not a desired value. Need to interpolate the mask to match "
+                "with output_size. The result image will be very low quality."
+            )
+            combine_label = torch.nn.functional.interpolate(combine_label, size=output_size, mode="nearest")
+        controlnet_cond_vis = binarize_labels(combine_label.as_tensor().long()).half()
+        # Generate random noise
+        latents = initialize_noise_latents(latent_shape, device) * noise_factor
+        # synthesize latents
+        noise_scheduler.set_timesteps(
+            num_inference_steps=num_inference_steps, input_img_size=torch.prod(torch.tensor(latent_shape[-3:]))
+        )
+        # synthesize latents
+        guidance_scale = 0  # API for classifier-free guidence, not used in this version
+        all_next_timesteps = torch.cat(
+            (noise_scheduler.timesteps[1:], torch.tensor([0], dtype=noise_scheduler.timesteps.dtype))
+        )
+        for t, next_t in tqdm(
+            zip(noise_scheduler.timesteps, all_next_timesteps),
+            total=min(len(noise_scheduler.timesteps), len(all_next_timesteps)),
+        ):
+            timesteps = torch.Tensor((t,)).to(device)
+            if guidance_scale == 0:
+                down_block_res_samples, mid_block_res_sample = controlnet(
+                    x=latents, timesteps=timesteps, controlnet_cond=controlnet_cond_vis, class_labels=modality_tensor
+                )
+                predicted_velocity = diffusion_unet(
+                    x=latents,
+                    timesteps=timesteps,
+                    spacing_tensor=spacing_tensor,
+                    class_labels=modality_tensor,
+                    down_block_additional_residuals=down_block_res_samples,
+                    mid_block_additional_residual=mid_block_res_sample,
+                )
+            else:
+                down_block_res_samples, mid_block_res_sample = controlnet(
+                    x=torch.cat([latents] * 2),
+                    timesteps=torch.cat([timesteps] * 2),
+                    controlnet_cond=torch.cat([controlnet_cond_vis] * 2),
+                    class_labels=torch.cat([modality_tensor, torch.zeros_like(modality_tensor)]),
+                )
+                model_t, model_uncond = diffusion_unet(
+                    x=torch.cat([latents] * 2),
+                    timesteps=timesteps,
+                    spacing_tensor=torch.cat([timesteps] * 2),
+                    class_labels=torch.cat([modality_tensor, torch.zeros_like(modality_tensor)]),
+                    down_block_additional_residuals=down_block_res_samples,
+                    mid_block_additional_residual=mid_block_res_sample,
+                ).chunk(2)
+                predicted_velocity = model_uncond + guidance_scale * (model_t - model_uncond)
+            latents, _ = noise_scheduler.step(predicted_velocity, t, latents, next_timestep=next_t)
+        end_time = time.time()
+        logging.info(f"---- Latent features generation time: {end_time - start_time} seconds ----")
+        del predicted_velocity
+        torch.cuda.empty_cache()
+        # decode latents to synthesized images
+        logging.info("---- Start decoding latent features into images... ----")
+        inferer = SlidingWindowInferer(
+            roi_size=autoencoder_sliding_window_infer_size,
+            sw_batch_size=1,
+            progress=True,
+            mode="gaussian",
+            overlap=autoencoder_sliding_window_infer_overlap,
+            device=torch.device("cpu"),
+            sw_device=device,
+        )
+        start_time = time.time()
+        synthetic_images = dynamic_infer(inferer, recon_model, latents)
+        synthetic_images = torch.clip(synthetic_images, b_min, b_max).cpu()
+        end_time = time.time()
+        logging.info(f"---- Image decoding time: {end_time - start_time} seconds ----")
+        # post processing:
+        # project output to [0, 1]
+        synthetic_images = (synthetic_images - b_min) / (b_max - b_min)
+        # project output to [-1000, 1000]
+        synthetic_images = synthetic_images * (a_max - a_min) + a_min
+        # regularize background intensities
+        synthetic_images = crop_img_body_mask(synthetic_images, combine_label)
+        torch.cuda.empty_cache()
+    return synthetic_images, combine_label
+def filter_mask_with_organs(combine_label, anatomy_list):
+    """
+    Filter a mask to only include specified organs.
+    Args:
+        combine_label (torch.Tensor): The input mask.
+        anatomy_list (list): List of organ labels to keep.
+    Returns:
+        torch.Tensor: The filtered mask.
+    """
+    # final output mask file has shape of output_size, contains labels in anatomy_list
+    # it is already interpolated to target size
+    combine_label = combine_label.long()
+    # filter out the organs that are not in anatomy_list
+    for i in range(len(anatomy_list)):
+        organ = anatomy_list[i]
+        # replace it with a negative value so it will get mixed
+        combine_label[combine_label == organ] = -(i + 1)
+    # zero-out voxels with value not in anatomy_list
+    combine_label[combine_label > 0] = 0
+    # output positive values
+    combine_label = -combine_label
+    return combine_label
+def crop_img_body_mask(synthetic_images, combine_label):
+    """
+    Crop the synthetic image using a body mask.
+    Args:
+        synthetic_images (torch.Tensor): The synthetic images.
+        combine_label (torch.Tensor): The body mask.
+    Returns:
+        torch.Tensor: The cropped synthetic images.
+    """
+    synthetic_images[combine_label == 0] = -1000
+    return synthetic_images
+def check_input(body_region, anatomy_list, label_dict_json, output_size, spacing, controllable_anatomy_size):
+    """
+    Validate input parameters for image generation.
+    Args:
+        body_region (list): List of body regions.
+        anatomy_list (list): List of anatomical structures.
+        label_dict_json (str): Path to the label dictionary JSON file.
+        output_size (tuple): Desired output size of the image.
+        spacing (tuple): Desired voxel spacing.
+        controllable_anatomy_size (list): List of tuples specifying controllable anatomy sizes.
+    Raises:
+        ValueError: If any input parameter is invalid.
+    """
+    # check output_size and spacing format
+    if output_size[0] != output_size[1]:
+        raise ValueError(f"The first two components of output_size need to be equal, yet got {output_size}.")
+    if (output_size[0] not in [256, 384, 512]) or (output_size[2] not in [128, 256, 384, 512, 640, 768]):
+        raise ValueError(
+            (
+                "The output_size[0] have to be chosen from [256, 384, 512], and output_size[2] "
+                f"have to be chosen from [128, 256, 384, 512, 640, 768], yet got {output_size}."
+            )
+        )
+    if spacing[0] != spacing[1]:
+        raise ValueError(f"The first two components of spacing need to be equal, yet got {spacing}.")
+    if spacing[0] < 0.5 or spacing[0] > 3.0 or spacing[2] < 0.5 or spacing[2] > 5.0:
+        raise ValueError(
+            f"spacing[0] have to be between 0.5 and 3.0 mm, spacing[2] have to be between 0.5 and 5.0 mm, yet got {spacing}."
+        )
+    if (
+        output_size[0] * spacing[0] < 256
+        or output_size[2] * spacing[2] < 128
+        or output_size[0] * spacing[0] > 640
+        or output_size[2] * spacing[2] > 2000
+    ):
+        fov = [output_size[axis] * spacing[axis] for axis in range(3)]
+        raise ValueError(
+            (
+                f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). "
+                f"The FOV will be {fov}mm. We recommend the FOV in x and y axis to be at least 256mm for head, and at least "
+                "384mm for other body regions like abdomen, and less than 640mm. "
+                "For z-axis, we require it to be at least 128mm and less than 2000mm."
+            )
+        )
+    # check controllable_anatomy_size format
+    if len(controllable_anatomy_size) > 10:
+        raise ValueError(
+            (
+                "The output_size[0] have to be chosen from [256, 384, 512], and output_size[2] "
+                f"have to be chosen from [128, 256, 384, 512, 640, 768], yet got {output_size}."
+            )
+        )
+    available_controllable_organ = ["liver", "gallbladder", "stomach", "pancreas", "colon"]
+    available_controllable_tumor = [
+        "hepatic tumor",
+        "bone lesion",
+        "lung tumor",
+        "colon cancer primaries",
+        "pancreatic tumor",
+    ]
+    available_controllable_anatomy = available_controllable_organ + available_controllable_tumor
+    controllable_tumor = []
+    controllable_organ = []
+    for controllable_anatomy_size_pair in controllable_anatomy_size:
+        if controllable_anatomy_size_pair[0] not in available_controllable_anatomy:
+            raise ValueError(
+                (
+                    f"The controllable_anatomy have to be chosen from {available_controllable_anatomy}, "
+                    f"yet got {controllable_anatomy_size_pair[0]}."
+                )
+            )
+        if controllable_anatomy_size_pair[0] in available_controllable_tumor:
+            controllable_tumor += [controllable_anatomy_size_pair[0]]
+        if controllable_anatomy_size_pair[0] in available_controllable_organ:
+            controllable_organ += [controllable_anatomy_size_pair[0]]
+        if controllable_anatomy_size_pair[1] == -1:
+            continue
+        if controllable_anatomy_size_pair[1] < 0 or controllable_anatomy_size_pair[1] > 1.0:
+            raise ValueError(
+                (
+                    "The controllable size scale have to be between 0 and 1,0, or equal to -1, "
+                    f"yet got {controllable_anatomy_size_pair[1]}."
+                )
+            )
+    if len(controllable_tumor + controllable_organ) != len(list(set(controllable_tumor + controllable_organ))):
+        raise ValueError(f"Please do not repeat controllable_anatomy. Got {controllable_tumor + controllable_organ}.")
+    if len(controllable_tumor) > 1:
+        raise ValueError(f"Only one controllable tumor is supported. Yet got {controllable_tumor}.")
+    if len(controllable_anatomy_size) > 0:
+        logging.info(
+            (
+                "`controllable_anatomy_size` is not empty.\nWe will ignore `body_region` and `anatomy_list` "
+                f"and synthesize based on `controllable_anatomy_size`: ({controllable_anatomy_size})."
+            )
+        )
+    else:
+        logging.info(
+            (f"`controllable_anatomy_size` is empty.\nWe will synthesize based on `anatomy_list`: ({anatomy_list}).")
+        )
+        # check body_region format
+        available_body_region = ["head", "chest", "thorax", "abdomen", "pelvis", "lower"]
+        for region in body_region:
+            if region not in available_body_region:
+                raise ValueError(
+                    f"The components in body_region have to be chosen from {available_body_region}, yet got {region}."
+                )
+        # check anatomy_list format
+        with open(label_dict_json) as f:
+            label_dict = json.load(f)
+        for anatomy in anatomy_list:
+            if anatomy not in label_dict.keys():
+                raise ValueError(
+                    f"The components in anatomy_list have to be chosen from {label_dict.keys()}, yet got {anatomy}."
+                )
+    logging.info(f"The generate results will have voxel size to be {spacing} mm, volume size to be {output_size}.")
+    return
+class LDMSampler:
+    """
+    A sampler class for generating synthetic medical images and masks using latent diffusion models.
+    Attributes:
+        Various attributes related to model configuration, input parameters, and generation settings.
+    """
+    def __init__(
+        self,
+        body_region,
+        anatomy_list,
+        modality,
+        all_mask_files_json,
+        all_anatomy_size_condtions_json,
+        all_mask_files_base_dir,
+        label_dict_json,
+        label_dict_remap_json,
+        autoencoder,
+        diffusion_unet,
+        controlnet,
+        noise_scheduler,
+        scale_factor,
+        mask_generation_autoencoder,
+        mask_generation_diffusion_unet,
+        mask_generation_scale_factor,
+        mask_generation_noise_scheduler,
+        device,
+        latent_shape,
+        mask_generation_latent_shape,
+        output_size,
+        output_dir,
+        controllable_anatomy_size,
+        image_output_ext=".nii.gz",
+        label_output_ext=".nii.gz",
+        real_img_median_statistics="./configs/image_median_statistics.json",
+        spacing=(1, 1, 1),
+        num_inference_steps=None,
+        mask_generation_num_inference_steps=None,
+        random_seed=None,
+        autoencoder_sliding_window_infer_size=(96, 96, 96),
+        autoencoder_sliding_window_infer_overlap=0.6667,
+    ) -> None:
+        """
+        Initialize the LDMSampler with various parameters and models.
+        Args:
+            Various parameters related to model configuration, input settings, and output specifications.
+        """
+        self.random_seed = random_seed
+        if random_seed is not None:
+            set_determinism(seed=random_seed)
+        with open(label_dict_json, "r") as f:
+            label_dict = json.load(f)
+        self.all_anatomy_size_condtions_json = all_anatomy_size_condtions_json
+        # intialize variables
+        self.body_region = body_region
+        self.anatomy_list = [label_dict[organ] for organ in anatomy_list]
+        self.modality_int = modality_mapping[modality]
+        self.all_mask_files_json = all_mask_files_json
+        self.data_root = all_mask_files_base_dir
+        self.label_dict_remap_json = label_dict_remap_json
+        self.autoencoder = autoencoder
+        self.diffusion_unet = diffusion_unet
+        self.controlnet = controlnet
+        self.noise_scheduler = noise_scheduler
+        self.scale_factor = scale_factor
+        self.mask_generation_autoencoder = mask_generation_autoencoder
+        self.mask_generation_diffusion_unet = mask_generation_diffusion_unet
+        self.mask_generation_scale_factor = mask_generation_scale_factor
+        self.mask_generation_noise_scheduler = mask_generation_noise_scheduler
+        self.device = device
+        self.latent_shape = latent_shape
+        self.mask_generation_latent_shape = mask_generation_latent_shape
+        self.output_size = output_size
+        self.output_dir = output_dir
+        self.noise_factor = 1.0
+        self.controllable_anatomy_size = controllable_anatomy_size
+        if len(self.controllable_anatomy_size):
+            logging.info("controllable_anatomy_size is given, mask generation is triggered!")
+            # overwrite the anatomy_list by given organs in self.controllable_anatomy_size
+            self.anatomy_list = [label_dict[organ_and_size[0]] for organ_and_size in self.controllable_anatomy_size]
+        self.image_output_ext = image_output_ext
+        self.label_output_ext = label_output_ext
+        # Set the default value for number of inference steps to 1000
+        self.num_inference_steps = num_inference_steps if num_inference_steps is not None else 1000
+        self.mask_generation_num_inference_steps = (
+            mask_generation_num_inference_steps if mask_generation_num_inference_steps is not None else 1000
+        )
+        if any(size % 16 != 0 for size in autoencoder_sliding_window_infer_size):
+            raise ValueError(
+                f"autoencoder_sliding_window_infer_size must be divisible by 16.\n Got {autoencoder_sliding_window_infer_size}"
+            )
+        if not (0 <= autoencoder_sliding_window_infer_overlap <= 1):
+            raise ValueError(
+                (
+                    "Value of autoencoder_sliding_window_infer_overlap must be between 0 "
+                    f"and 1.\n Got {autoencoder_sliding_window_infer_overlap}"
+                )
+            )
+        self.autoencoder_sliding_window_infer_size = autoencoder_sliding_window_infer_size
+        self.autoencoder_sliding_window_infer_overlap = autoencoder_sliding_window_infer_overlap
+        # quality check args
+        self.max_try_time = 3  # if not pass quality check, will try self.max_try_time times
+        with open(real_img_median_statistics, "r") as json_file:
+            self.median_statistics = json.load(json_file)
+        self.label_int_dict = {
+            "liver": [1],
+            "spleen": [3],
+            "pancreas": [4],
+            "kidney": [5, 14],
+            "lung": [28, 29, 30, 31, 31],
+            "brain": [22],
+            "hepatic tumor": [26],
+            "bone lesion": [128],
+            "lung tumor": [23],
+            "colon cancer primaries": [27],
+            "pancreatic tumor": [24],
+            "bone": list(range(33, 57)) + list(range(63, 98)) + [120, 122, 127],
+        }
+        # networks
+        self.autoencoder.eval()
+        self.diffusion_unet.eval()
+        self.controlnet.eval()
+        self.mask_generation_autoencoder.eval()
+        self.mask_generation_diffusion_unet.eval()
+        self.spacing = spacing
+        self.val_transforms = Compose(
+            [
+                monai.transforms.LoadImaged(keys=["pseudo_label"]),
+                monai.transforms.EnsureChannelFirstd(keys=["pseudo_label"]),
+                monai.transforms.Orientationd(keys=["pseudo_label"], axcodes="RAS"),
+                monai.transforms.EnsureTyped(keys=["pseudo_label"], dtype=torch.uint8),
+                monai.transforms.Lambdad(keys="spacing", func=lambda x: torch.FloatTensor(x)),
+                monai.transforms.Lambdad(keys="spacing", func=lambda x: x * 1e2),
+            ]
+        )
+        logging.info("LDM sampler initialized.")
+    def sample_multiple_images(self, num_img):
+        """
+        Generate multiple synthetic images and masks.
+        Args:
+            num_img (int): Number of images to generate.
+        """
+        output_filenames = []
+        if len(self.controllable_anatomy_size) > 0:
+            # we will use mask generation instead of finding candidate masks
+            # create a dummy selected_mask_files for placeholder
+            selected_mask_files = list(range(num_img))
+            # prerpare organ size conditions
+            anatomy_size_condtion = self.prepare_anatomy_size_condtion(self.controllable_anatomy_size)
+        else:
+            need_resample = False
+            # find candidate mask and save to candidate_mask_files
+            candidate_mask_files = find_masks(
+                self.anatomy_list, self.spacing, self.output_size, True, self.all_mask_files_json, self.data_root
+            )
+            if len(candidate_mask_files) < num_img:
+                # if we cannot find enough masks based on the exact match of anatomy list, spacing, and output size,
+                # then we will try to find the closest mask in terms of  spacing, and output size.
+                logging.info("Resample mask file to get desired output size and spacing")
+                candidate_mask_files = self.find_closest_masks(num_img)
+                need_resample = True
+            selected_mask_files = self.select_mask(candidate_mask_files, num_img)
+            if len(selected_mask_files) < num_img:
+                raise ValueError(
+                    (
+                        f"len(selected_mask_files) ({len(selected_mask_files)}) < num_img ({num_img}). "
+                        "This should not happen. Please revisit function select_mask(self, candidate_mask_files, num_img)."
+                    )
+                )
+        num_generated_img = 0
+        for index_s in range(len(selected_mask_files)):
+            item = selected_mask_files[index_s]
+            if num_generated_img >= num_img:
+                break
+            logging.info("---- Start preparing masks... ----")
+            start_time = time.time()
+            logging.info(f"Image will be generated based on {item}.")
+            if len(self.controllable_anatomy_size) > 0:
+                # generate a synthetic mask
+                (combine_label_or, spacing_tensor) = self.prepare_one_mask_and_meta_info(anatomy_size_condtion)
+            else:
+                # read in mask file
+                mask_file = item["mask_file"]
+                if_aug = item["if_aug"]
+                (combine_label_or, spacing_tensor) = self.read_mask_information(mask_file)
+                if need_resample:
+                    combine_label_or = self.ensure_output_size_and_spacing(combine_label_or)
+                # mask augmentation
+                if if_aug:
+                    combine_label_or = augmentation(combine_label_or, self.output_size, random_seed=self.random_seed)
+            end_time = time.time()
+            logging.info(f"---- Mask preparation time: {end_time - start_time} seconds ----")
+            torch.cuda.empty_cache()
+            # generate image/label pairs
+            modality_tensor = torch.ones_like(spacing_tensor[:, 0]).long() * self.modality_int
+            # start generation
+            synthetic_images, synthetic_labels = self.sample_one_pair(combine_label_or, modality_tensor, spacing_tensor)
+            # synthetic image quality check
+            pass_quality_check = self.quality_check(
+                synthetic_images.cpu().detach().numpy(), combine_label_or.cpu().detach().numpy()
+            )
+            if pass_quality_check or (num_img - num_generated_img) >= (len(selected_mask_files) - index_s):
+                if not pass_quality_check:
+                    logging.info(
+                        "Generated image/label pair did not pass quality check, but will still save them. "
+                        "Please consider changing spacing and output_size to facilitate a more realistic setting."
+                    )
+                num_generated_img = num_generated_img + 1
+                # save image/label pairs
+                output_postfix = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+                synthetic_labels.meta["filename_or_obj"] = "sample.nii.gz"
+                synthetic_images = MetaTensor(synthetic_images, meta=synthetic_labels.meta)
+                img_saver = SaveImage(
+                    output_dir=self.output_dir,
+                    output_postfix=output_postfix + "_image",
+                    output_ext=self.image_output_ext,
+                    separate_folder=False,
+                )
+                img_saver(synthetic_images[0])
+                synthetic_images_filename = os.path.join(
+                    self.output_dir, "sample_" + output_postfix + "_image" + self.image_output_ext
+                )
+                # filter out the organs that are not in anatomy_list
+                synthetic_labels = filter_mask_with_organs(synthetic_labels, self.anatomy_list)
+                label_saver = SaveImage(
+                    output_dir=self.output_dir,
+                    output_postfix=output_postfix + "_label",
+                    output_ext=self.label_output_ext,
+                    separate_folder=False,
+                )
+                label_saver(synthetic_labels[0])
+                synthetic_labels_filename = os.path.join(
+                    self.output_dir, "sample_" + output_postfix + "_label" + self.label_output_ext
+                )
+                output_filenames.append([synthetic_images_filename, synthetic_labels_filename])
+            else:
+                logging.info("Generated image/label pair did not pass quality check, will re-generate another pair.")
+        return output_filenames
+    def select_mask(self, candidate_mask_files, num_img):
+        """
+        Select mask files for image generation.
+        Args:
+            candidate_mask_files (list): List of candidate mask files.
+            num_img (int): Number of images to generate.
+        Returns:
+            list: Selected mask files with augmentation flags.
+        """
+        selected_mask_files = []
+        random.shuffle(candidate_mask_files)
+        for n in range(num_img * self.max_try_time):
+            mask_file = candidate_mask_files[n % len(candidate_mask_files)]
+            selected_mask_files.append({"mask_file": mask_file, "if_aug": True})
+        return selected_mask_files
+    def sample_one_pair(self, combine_label_or_aug, modality_tensor, spacing_tensor):
+        """
+        Generate a single pair of synthetic image and mask.
+        Args:
+            combine_label_or_aug (torch.Tensor): Combined label tensor or augmented label.
+            modality_tensor (torch.Tensor): Tensor specifying the image modality.
+            spacing_tensor (torch.Tensor): Tensor specifying the spacing.
+        Returns:
+            tuple: A tuple containing the synthetic image and its corresponding label.
+        """
+        # generate image/label pairs
+        synthetic_images, synthetic_labels = ldm_conditional_sample_one_image(
+            autoencoder=self.autoencoder,
+            diffusion_unet=self.diffusion_unet,
+            controlnet=self.controlnet,
+            noise_scheduler=self.noise_scheduler,
+            scale_factor=self.scale_factor,
+            device=self.device,
+            combine_label_or=combine_label_or_aug,
+            modality_tensor=modality_tensor,
+            spacing_tensor=spacing_tensor,
+            latent_shape=self.latent_shape,
+            output_size=self.output_size,
+            noise_factor=self.noise_factor,
+            num_inference_steps=self.num_inference_steps,
+            autoencoder_sliding_window_infer_size=self.autoencoder_sliding_window_infer_size,
+            autoencoder_sliding_window_infer_overlap=self.autoencoder_sliding_window_infer_overlap,
+        )
+        return synthetic_images, synthetic_labels
+    def prepare_anatomy_size_condtion(self, controllable_anatomy_size):
+        """
+        Prepare anatomy size conditions for mask generation.
+        Args:
+            controllable_anatomy_size (list): List of tuples specifying controllable anatomy sizes.
+        Returns:
+            list: Prepared anatomy size conditions.
+        """
+        anatomy_size_idx = {
+            "gallbladder": 0,
+            "liver": 1,
+            "stomach": 2,
+            "pancreas": 3,
+            "colon": 4,
+            "lung tumor": 5,
+            "pancreatic tumor": 6,
+            "hepatic tumor": 7,
+            "colon cancer primaries": 8,
+            "bone lesion": 9,
+        }
+        provide_anatomy_size = [None for _ in range(10)]
+        logging.info(f"controllable_anatomy_size: {controllable_anatomy_size}")
+        for element in controllable_anatomy_size:
+            anatomy_name, anatomy_size = element
+            provide_anatomy_size[anatomy_size_idx[anatomy_name]] = anatomy_size
+        with open(self.all_anatomy_size_condtions_json, "r") as f:
+            all_anatomy_size_condtions = json.load(f)
+        # loop through the database and find closest combinations
+        candidate_list = []
+        for anatomy_size in all_anatomy_size_condtions:
+            size = anatomy_size["organ_size"]
+            diff = 0
+            for db_size, provide_size in zip(size, provide_anatomy_size):
+                if provide_size is None:
+                    continue
+                diff += abs(provide_size - db_size)
+            candidate_list.append((size, diff))
+        candidate_condition = sorted(candidate_list, key=lambda x: x[1])[0][0]
+        # overwrite the anatomy size provided by users
+        for element in controllable_anatomy_size:
+            anatomy_name, anatomy_size = element
+            candidate_condition[anatomy_size_idx[anatomy_name]] = anatomy_size
+        return candidate_condition
+    def prepare_one_mask_and_meta_info(self, anatomy_size_condtion):
+        """
+        Prepare a single mask and its associated meta information.
+        Args:
+            anatomy_size_condtion (list): Anatomy size conditions.
+        Returns:
+            tuple: A tuple containing the prepared mask and associated tensors.
+        """
+        combine_label_or = self.sample_one_mask(anatomy_size=anatomy_size_condtion)
+        # TODO: current mask generation model only can generate 256^3 volumes with 1.5 mm spacing.
+        affine = torch.zeros((4, 4))
+        affine[0, 0] = 1.5
+        affine[1, 1] = 1.5
+        affine[2, 2] = 1.5
+        affine[3, 3] = 1.0  # dummy
+        combine_label_or = MetaTensor(combine_label_or, affine=affine)
+        combine_label_or = self.ensure_output_size_and_spacing(combine_label_or)
+        spacing_tensor = torch.FloatTensor(self.spacing).unsqueeze(0).half().to(self.device) * 1e2
+        return combine_label_or, spacing_tensor
+    def sample_one_mask(self, anatomy_size):
+        """
+        Generate a single synthetic mask.
+        Args:
+            anatomy_size (list): Anatomy size specifications.
+        Returns:
+            torch.Tensor: The generated synthetic mask.
+        """
+        # generate one synthetic mask
+        synthetic_mask = ldm_conditional_sample_one_mask(
+            self.mask_generation_autoencoder,
+            self.mask_generation_diffusion_unet,
+            self.mask_generation_noise_scheduler,
+            self.mask_generation_scale_factor,
+            anatomy_size,
+            self.device,
+            self.mask_generation_latent_shape,
+            label_dict_remap_json=self.label_dict_remap_json,
+            num_inference_steps=self.mask_generation_num_inference_steps,
+            autoencoder_sliding_window_infer_size=self.autoencoder_sliding_window_infer_size,
+            autoencoder_sliding_window_infer_overlap=self.autoencoder_sliding_window_infer_overlap,
+        )
+        return synthetic_mask
+    def ensure_output_size_and_spacing(self, labels, check_contains_target_labels=True):
+        """
+        Ensure the output mask has the correct size and spacing.
+        Args:
+            labels (torch.Tensor): Input label tensor.
+            check_contains_target_labels (bool): Whether to check if the resampled mask contains target labels.
+        Returns:
+            torch.Tensor: Resampled label tensor.
+        Raises:
+            ValueError: If the resampled mask doesn't contain required class labels.
+        """
+        current_spacing = [labels.affine[0, 0], labels.affine[1, 1], labels.affine[2, 2]]
+        current_shape = list(labels.squeeze().shape)
+        need_resample = False
+        # check spacing
+        for i, j in zip(current_spacing, self.spacing):
+            if i != j:
+                need_resample = True
+        # check output size
+        for i, j in zip(current_shape, self.output_size):
+            if i != j:
+                need_resample = True
+        # resample to target size and spacing
+        if need_resample:
+            logging.info("Resampling mask to target shape and spacing")
+            logging.info(f"Resize Spacing: {current_spacing} -> {self.spacing}")
+            logging.info(f"Output size: {current_shape} -> {self.output_size}")
+            spacing = monai.transforms.Spacing(pixdim=tuple(self.spacing), mode="nearest")
+            pad_crop = monai.transforms.ResizeWithPadOrCrop(spatial_size=tuple(self.output_size))
+            labels = pad_crop(spacing(labels.squeeze(0))).unsqueeze(0).to(labels.dtype)
+            contained_labels = torch.unique(labels)
+            if check_contains_target_labels:
+                # check if the resampled mask still contains those target labels
+                for anatomy_label in self.anatomy_list:
+                    if anatomy_label not in contained_labels:
+                        raise ValueError(
+                            (
+                                f"Resampled mask does not contain required class labels {anatomy_label}. "
+                                "Please consider increasing the output spacing or specifying a larger output size."
+                            )
+                        )
+        return labels
+    def read_mask_information(self, mask_file):
+        """
+        Read mask information from a file.
+        Args:
+            mask_file (str): Path to the mask file.
+        Returns:
+            tuple: A tuple containing the mask tensor and associated information.
+        """
+        val_data = self.val_transforms(mask_file)
+        for key in ["pseudo_label", "spacing"]:
+            val_data[key] = val_data[key].unsqueeze(0).to(self.device)
+        return (val_data["pseudo_label"], val_data["spacing"])
+    def find_closest_masks(self, num_img):
+        """
+        Find the closest matching masks from the database.
+        Args:
+            num_img (int): Number of images to generate.
+        Returns:
+            list: List of closest matching mask candidates.
+        Raises:
+            ValueError: If suitable candidates cannot be found.
+        """
+        # first check the database based on anatomy list
+        candidates = find_masks(
+            self.anatomy_list, self.spacing, self.output_size, False, self.all_mask_files_json, self.data_root
+        )
+        if len(candidates) < num_img:
+            raise ValueError(f"candidate masks are less than {num_img}).")
+        # loop through the database and find closest combinations
+        new_candidates = []
+        for c in candidates:
+            diff = 0
+            include_c = True
+            for axis in range(3):
+                if abs(c["dim"][axis]) < self.output_size[axis] - 64:
+                    # we cannot upsample the mask too much
+                    include_c = False
+                    break
+                # check diff in FOV, major metric
+                diff += abs(
+                    (abs(c["dim"][axis] * c["spacing"][axis]) - self.output_size[axis] * self.spacing[axis]) / 10
+                )
+                # check diff in dim
+                diff += abs((abs(c["dim"][axis]) - self.output_size[axis]) / 100)
+                # check diff in spacing
+                diff += abs(abs(c["spacing"][axis]) - self.spacing[axis])
+            if include_c:
+                new_candidates.append((c, diff))
+        # choose top-2*num_img candidates (at least 5)
+        new_candidates = sorted(new_candidates, key=lambda x: x[1])[: max(2 * num_img, 5)]
+        final_candidates = []
+        # check top-2*num_img candidates and update spacing after resampling
+        image_loader = monai.transforms.LoadImage(image_only=True, ensure_channel_first=True)
+        for c, _ in new_candidates:
+            label = image_loader(c["pseudo_label"])
+            try:
+                label = self.ensure_output_size_and_spacing(label.unsqueeze(0))
+            except ValueError as e:
+                if "Resampled mask does not contain required class labels" in str(e):
+                    continue
+                else:
+                    raise e
+            # get region_index after resample
+            c["spacing"] = self.spacing
+            c["dim"] = self.output_size
+            final_candidates.append(c)
+        if len(final_candidates) == 0:
+            raise ValueError("Cannot find body region with given anatomy list.")
+        return final_candidates
+    def quality_check(self, image_data, label_data):
+        """
+        Perform a quality check on the generated image.
+        Args:
+            image_data (np.ndarray): The generated image.
+            label_data (np.ndarray): The corresponding whole body mask.
+        Returns:
+            bool: True if the image passes the quality check, False otherwise.
+        """
+        outlier_results = is_outlier(self.median_statistics, image_data, label_data, self.label_int_dict)
+        for label, result in outlier_results.items():
+            if result.get("is_outlier", False):
+                logging.info(
+                    (
+                        f"Generated image quality check for label '{label}' failed: median value {result['median_value']} "
+                        f"is outside the acceptable range ({result['low_thresh']} - {result['high_thresh']})."
+                    )
+                )
+                return False
+        return True

scripts/trainer.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
+import torch
+import torch.nn.functional as F
+from monai.engines.trainer import Trainer
+from monai.engines.utils import IterationEvents, PrepareBatchExtraInput, default_metric_cmp_fn
+from monai.inferers import Inferer
+from monai.networks.schedulers import Scheduler
+from monai.transforms import Transform
+from monai.utils import IgniteInfo, RankFilter, min_version, optional_import
+from monai.utils.enums import CommonKeys as Keys
+from torch.optim.optimizer import Optimizer
+from torch.utils.data import DataLoader
+from .utils import binarize_labels
+if TYPE_CHECKING:
+    from ignite.engine import Engine, EventEnum
+    from ignite.metrics import Metric
+else:
+    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
+    Metric, _ = optional_import("ignite.metrics", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Metric")
+    EventEnum, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "EventEnum")
+__all__ = ["MAISIControlNetTrainer"]
+# Module-level variable for prepare_batch default value
+DEFAULT_PREPARE_BATCH = PrepareBatchExtraInput(extra_keys=("dim", "spacing", "top_region_index", "bottom_region_index"))
+class MAISIControlNetTrainer(Trainer):
+    """
+    Supervised training method with image and label, inherits from ``Trainer`` and ``Workflow``.
+    Args:
+        device: an object representing the device on which to run.
+        max_epochs: the total epoch number for trainer to run.
+        train_data_loader: Ignite engine use data_loader to run, must be Iterable or torch.DataLoader.
+        controlnet: controlnet to train in the trainer, should be regular PyTorch `torch.nn.Module`.
+        diffusion_unet: diffusion_unet used in the trainer, should be regular PyTorch `torch.nn.Module`.
+        optimizer: the optimizer associated to the detector, should be regular PyTorch optimizer from `torch.optim`
+            or its subclass.
+        epoch_length: number of iterations for one epoch, default to `len(train_data_loader)`.
+        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
+            with respect to the host. For other cases, this argument has no effect.
+        prepare_batch: function to parse expected data (usually `image`,`box`, `label` and other detector args)
+            from `engine.state.batch` for every iteration, for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html.
+        iteration_update: the callable function for every iteration, expect to accept `engine`
+            and `engine.state.batch` as inputs, return data will be stored in `engine.state.output`.
+            if not provided, use `self._iteration()` instead. for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html.
+        inferer: inference method that execute model forward on input data, like: SlidingWindow, etc.
+        postprocessing: execute additional transformation for the model output data.
+            Typically, several Tensor based transforms composed by `Compose`.
+        key_train_metric: compute metric when every iteration completed, and save average value to
+            engine.state.metrics when epoch completlabel_set = np.arange(output_classes).tolist()d.
+            key_train_metric is the main metric to compare and save the checkpoint into files.
+        additional_metrics: more Ignite metrics that also attach to Ignite Engine.
+        metric_cmp_fn: function to compare current key metric with previous best key metric value,
+            it must accept 2 args (current_metric, previous_best) and return a bool result: if `True`, will update
+            `best_metric` and `best_metric_epoch` with current metric and epoch, default to `greater than`.
+        train_handlers: every handler is a set of Ignite Event-Handlers, must have `attach` function, like:
+            CheckpointHandler, StatsHandler, etc.
+        amp: whether to enable auto-mixed-precision training, default is False.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html
+            #ignite.engine.engine.Engine.register_events.
+        decollate: whether to decollate the batch-first data to a list of data after model computation,
+            recommend `decollate=True` when `postprocessing` uses components from `monai.transforms`.
+            default to `True`.
+        optim_set_to_none: when calling `optimizer.zero_grad()`, instead of setting to zero, set the grads to None.
+            more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
+        to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
+            `device`, `non_blocking`.
+        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+    """
+    def __init__(
+        self,
+        device: torch.device,
+        max_epochs: int,
+        train_data_loader: Iterable | DataLoader,
+        controlnet: torch.nn.Module,
+        diffusion_unet: torch.nn.Module,
+        optimizer: Optimizer,
+        loss_function: Callable,
+        inferer: Inferer,
+        noise_scheduler: Scheduler,
+        epoch_length: int | None = None,
+        non_blocking: bool = False,
+        prepare_batch: Callable = DEFAULT_PREPARE_BATCH,
+        iteration_update: Callable[[Engine, Any], Any] | None = None,
+        postprocessing: Transform | None = None,
+        key_train_metric: dict[str, Metric] | None = None,
+        additional_metrics: dict[str, Metric] | None = None,
+        metric_cmp_fn: Callable = default_metric_cmp_fn,
+        train_handlers: Sequence | None = None,
+        amp: bool = False,
+        event_names: list[str | EventEnum] | None = None,
+        event_to_attr: dict | None = None,
+        decollate: bool = True,
+        optim_set_to_none: bool = False,
+        to_kwargs: dict | None = None,
+        amp_kwargs: dict | None = None,
+        hyper_kwargs: dict | None = None,
+    ) -> None:
+        super().__init__(
+            device=device,
+            max_epochs=max_epochs,
+            data_loader=train_data_loader,
+            epoch_length=epoch_length,
+            non_blocking=non_blocking,
+            prepare_batch=prepare_batch,
+            iteration_update=iteration_update,
+            postprocessing=postprocessing,
+            key_metric=key_train_metric,
+            additional_metrics=additional_metrics,
+            metric_cmp_fn=metric_cmp_fn,
+            handlers=train_handlers,
+            amp=amp,
+            event_names=event_names,
+            event_to_attr=event_to_attr,
+            decollate=decollate,
+            to_kwargs=to_kwargs,
+            amp_kwargs=amp_kwargs,
+        )
+        self.controlnet = controlnet
+        self.diffusion_unet = diffusion_unet
+        self.optimizer = optimizer
+        self.loss_function = loss_function
+        self.inferer = inferer
+        self.optim_set_to_none = optim_set_to_none
+        self.hyper_kwargs = hyper_kwargs
+        self.noise_scheduler = noise_scheduler
+        self.logger.addFilter(RankFilter())
+        for p in self.diffusion_unet.parameters():
+            p.requires_grad = False
+        print("freeze the parameters of the diffusion unet model.")
+    def _iteration(self, engine, batchdata: dict[str, torch.Tensor]):
+        """
+        Callback function for the Supervised Training processing logic of 1 iteration in Ignite Engine.
+        Return below items in a dictionary:
+            - IMAGE: image Tensor data for model input, already moved to device.
+        Args:
+            engine: `Vista3DTrainer` to execute operation for an iteration.
+            batchdata: input data for this iteration, usually can be dictionary or tuple of Tensor data.
+        Raises:
+            ValueError: When ``batchdata`` is None.
+        """
+        if batchdata is None:
+            raise ValueError("Must provide batch data for current iteration.")
+        inputs, labels, (dim, spacing, top_region_index, bottom_region_index), _ = engine.prepare_batch(
+            batchdata, engine.state.device, engine.non_blocking, **engine.to_kwargs
+        )
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: labels}
+        weighted_loss_label = engine.hyper_kwargs["weighted_loss_label"]
+        weighted_loss = engine.hyper_kwargs["weighted_loss"]
+        scale_factor = engine.hyper_kwargs["scale_factor"]
+        # scale image embedding by the provided scale_factor
+        inputs = inputs * scale_factor
+        def _compute_pred_loss():
+            # generate random noise
+            noise_shape = list(inputs.shape)
+            noise = torch.randn(noise_shape, dtype=inputs.dtype).to(inputs.device)
+            # use binary encoding to encode segmentation mask
+            controlnet_cond = binarize_labels(labels.as_tensor().to(torch.uint8)).float()
+            # create timesteps
+            timesteps = torch.randint(
+                0, engine.noise_scheduler.num_train_timesteps, (inputs.shape[0],), device=inputs.device
+            ).long()
+            # Create noisy latent
+            noisy_latent = engine.noise_scheduler.add_noise(original_samples=inputs, noise=noise, timesteps=timesteps)
+            # Get controlnet output
+            down_block_res_samples, mid_block_res_sample = engine.controlnet(
+                x=noisy_latent, timesteps=timesteps, controlnet_cond=controlnet_cond
+            )
+            noise_pred = engine.diffusion_unet(
+                x=noisy_latent,
+                timesteps=timesteps,
+                top_region_index_tensor=top_region_index,
+                bottom_region_index_tensor=bottom_region_index,
+                spacing_tensor=spacing,
+                down_block_additional_residuals=down_block_res_samples,
+                mid_block_additional_residual=mid_block_res_sample,
+            )
+            engine.state.output[Keys.PRED] = noise_pred
+            engine.fire_event(IterationEvents.FORWARD_COMPLETED)
+            if weighted_loss > 1.0:
+                weights = torch.ones_like(inputs).to(inputs.device)
+                roi = torch.zeros([noise_shape[0]] + [1] + noise_shape[2:]).to(inputs.device)
+                interpolate_label = F.interpolate(labels, size=inputs.shape[2:], mode="nearest")
+                # assign larger weights for ROI (tumor)
+                for label in weighted_loss_label:
+                    roi[interpolate_label == label] = 1
+                weights[roi.repeat(1, inputs.shape[1], 1, 1, 1) == 1] = weighted_loss
+                loss = (F.l1_loss(noise_pred.float(), noise.float(), reduction="none") * weights).mean()
+            else:
+                loss = F.l1_loss(noise_pred.float(), noise.float())
+            engine.state.output[Keys.LOSS] = loss
+            engine.fire_event(IterationEvents.LOSS_COMPLETED)
+        engine.controlnet.train()
+        engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
+        if engine.amp and engine.scaler is not None:
+            with torch.amp.autocast("cuda", **engine.amp_kwargs):
+                _compute_pred_loss()
+            engine.scaler.scale(engine.state.output[Keys.LOSS]).backward()
+            engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
+            engine.scaler.step(engine.optimizer)
+            engine.scaler.update()
+        else:
+            _compute_pred_loss()
+            engine.state.output[Keys.LOSS].backward()
+            engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
+            engine.optimizer.step()
+        engine.fire_event(IterationEvents.MODEL_COMPLETED)
+        return engine.state.output

scripts/utils.py ADDED Viewed

	@@ -0,0 +1,696 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+import copy
+import json
+import math
+import os
+import zipfile
+from argparse import Namespace
+from datetime import timedelta
+from typing import Any, Sequence
+import numpy as np
+import skimage
+import torch
+import torch.distributed as dist
+from monai.bundle import ConfigParser
+from monai.config import DtypeLike, NdarrayOrTensor
+from monai.data import CacheDataset, DataLoader, partition_dataset
+from monai.transforms import Compose, EnsureTyped, Lambdad, LoadImaged, Orientationd
+from monai.transforms.utils_morphological_ops import dilate, erode
+from monai.utils import TransformBackends, convert_data_type, convert_to_dst_type, get_equivalent_dtype
+from scipy import stats
+from torch import Tensor
+def unzip_dataset(dataset_dir):
+    if dist.is_available() and dist.is_initialized():
+        rank = dist.get_rank()
+    else:
+        rank = 0
+    if rank == 0:
+        if not os.path.exists(dataset_dir):
+            zip_file_path = dataset_dir + ".zip"
+            if not os.path.isfile(zip_file_path):
+                raise ValueError(f"Please download {zip_file_path}.")
+            with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+                zip_ref.extractall(path=os.path.dirname(dataset_dir))
+            print(f"Unzipped {zip_file_path} to {dataset_dir}.")
+    if dist.is_available() and dist.is_initialized():
+        dist.barrier()  # Synchronize all processes
+    return
+def add_data_dir2path(list_files: list, data_dir: str, fold: int = None) -> tuple[list, list]:
+    """
+    Read a list of data dictionary.
+    Args:
+        list_files (list): input data to load and transform to generate dataset for model.
+        data_dir (str): directory of files.
+        fold (int, optional): fold index for cross validation. Defaults to None.
+    Returns:
+        tuple[list, list]: A tuple of two arrays (training, validation).
+    """
+    new_list_files = copy.deepcopy(list_files)
+    if fold is not None:
+        new_list_files_train = []
+        new_list_files_val = []
+    for d in new_list_files:
+        d["image"] = os.path.join(data_dir, d["image"])
+        if "label" in d:
+            d["label"] = os.path.join(data_dir, d["label"])
+        if fold is not None:
+            if d["fold"] == fold:
+                new_list_files_val.append(copy.deepcopy(d))
+            else:
+                new_list_files_train.append(copy.deepcopy(d))
+    if fold is not None:
+        return new_list_files_train, new_list_files_val
+    else:
+        return new_list_files, []
+def maisi_datafold_read(json_list, data_base_dir, fold=None):
+    with open(json_list, "r") as f:
+        filenames_train = json.load(f)["training"]
+    # training data
+    train_files, val_files = add_data_dir2path(filenames_train, data_base_dir, fold=fold)
+    print(f"dataset: {data_base_dir}, num_training_files: {len(train_files)}, num_val_files: {len(val_files)}")
+    return train_files, val_files
+def remap_labels(mask, label_dict_remap_json):
+    """
+    Remap labels in the mask according to the provided label dictionary.
+    This function reads a JSON file containing label mapping information and applies
+    the mapping to the input mask.
+    Args:
+        mask (Tensor): The input mask tensor to be remapped.
+        label_dict_remap_json (str): Path to the JSON file containing the label mapping dictionary.
+    Returns:
+        Tensor: The remapped mask tensor.
+    """
+    with open(label_dict_remap_json, "r") as f:
+        mapping_dict = json.load(f)
+    mapper = MapLabelValue(
+        orig_labels=[pair[0] for pair in mapping_dict.values()],
+        target_labels=[pair[1] for pair in mapping_dict.values()],
+        dtype=torch.uint8,
+    )
+    return mapper(mask[0, ...])[None, ...].to(mask.device)
+def get_index_arr(img):
+    """
+    Generate an index array for the given image.
+    This function creates a 3D array of indices corresponding to the dimensions of the input image.
+    Args:
+        img (ndarray): The input image array.
+    Returns:
+        ndarray: A 3D array containing the indices for each dimension of the input image.
+    """
+    return np.moveaxis(
+        np.moveaxis(
+            np.stack(np.meshgrid(np.arange(img.shape[0]), np.arange(img.shape[1]), np.arange(img.shape[2]))), 0, 3
+        ),
+        0,
+        1,
+    )
+def supress_non_largest_components(img, target_label, default_val=0):
+    """
+    Suppress all components except the largest one(s) for specified target labels.
+    This function identifies the largest component(s) for each target label and
+    suppresses all other smaller components.
+    Args:
+        img (ndarray): The input image array.
+        target_label (list): List of label values to process.
+        default_val (int, optional): Value to assign to suppressed voxels. Defaults to 0.
+    Returns:
+        tuple: A tuple containing:
+            - ndarray: Modified image with non-largest components suppressed.
+            - int: Number of voxels that were changed.
+    """
+    index_arr = get_index_arr(img)
+    img_mod = copy.deepcopy(img)
+    new_background = np.zeros(img.shape, dtype=np.bool_)
+    for label in target_label:
+        label_cc = skimage.measure.label(img == label, connectivity=3)
+        uv, uc = np.unique(label_cc, return_counts=True)
+        dominant_vals = uv[np.argsort(uc)[::-1][:2]]
+        if len(dominant_vals) >= 2:  # Case: no predictions
+            new_background = np.logical_or(
+                new_background,
+                np.logical_not(np.logical_or(label_cc == dominant_vals[0], label_cc == dominant_vals[1])),
+            )
+    for voxel in index_arr[new_background]:
+        img_mod[tuple(voxel)] = default_val
+    diff = np.sum((img - img_mod) > 0)
+    return img_mod, diff
+def erode_one_img(mask_t: Tensor, filter_size: int | Sequence[int] = 3, pad_value: float = 1.0) -> Tensor:
+    """
+    Erode 2D/3D binary mask with data type as torch tensor.
+    Args:
+        mask_t: input 2D/3D binary mask, [M,N] or [M,N,P] torch tensor.
+        filter_size: erosion filter size, has to be odd numbers, default to be 3.
+        pad_value: the filled value for padding. We need to pad the input before filtering
+                   to keep the output with the same size as input. Usually use default value
+                   and not changed.
+    Return:
+        Tensor: eroded mask, same shape as input.
+    """
+    return erode(mask_t.float().unsqueeze(0).unsqueeze(0), filter_size, pad_value=pad_value).squeeze(0).squeeze(0)
+def dilate_one_img(mask_t: Tensor, filter_size: int | Sequence[int] = 3, pad_value: float = 0.0) -> Tensor:
+    """
+    Dilate 2D/3D binary mask with data type as torch tensor.
+    Args:
+        mask_t: input 2D/3D binary mask, [M,N] or [M,N,P] torch tensor.
+        filter_size: dilation filter size, has to be odd numbers, default to be 3.
+        pad_value: the filled value for padding. We need to pad the input before filtering
+                   to keep the output with the same size as input. Usually use default value
+                   and not changed.
+    Return:
+        Tensor: dilated mask, same shape as input.
+    """
+    return dilate(mask_t.float().unsqueeze(0).unsqueeze(0), filter_size, pad_value=pad_value).squeeze(0).squeeze(0)
+def binarize_labels(x: Tensor, bits: int = 8) -> Tensor:
+    """
+    Convert input tensor to binary representation.
+    This function takes an input tensor and converts it to a binary representation
+    using the specified number of bits.
+    Args:
+        x (Tensor): Input tensor with shape (B, 1, H, W, D).
+        bits (int, optional): Number of bits to use for binary representation. Defaults to 8.
+    Returns:
+        Tensor: Binary representation of the input tensor with shape (B, bits, H, W, D).
+    """
+    mask = 2 ** torch.arange(bits).to(x.device, x.dtype)
+    return x.unsqueeze(-1).bitwise_and(mask).ne(0).byte().squeeze(1).permute(0, 4, 1, 2, 3)
+def setup_ddp(rank: int, world_size: int) -> torch.device:
+    """
+    Initialize the distributed process group.
+    Args:
+        rank (int): rank of the current process.
+        world_size (int): number of processes participating in the job.
+     Returns:
+        torch.device: device of the current process.
+    """
+    dist.init_process_group(
+        backend="nccl", init_method="env://", timeout=timedelta(seconds=36000), rank=rank, world_size=world_size
+    )
+    dist.barrier()
+    device = torch.device(f"cuda:{rank}")
+    return device
+def define_instance(args: Namespace, instance_def_key: str) -> Any:
+    """
+    Define and instantiate an object based on the provided arguments and instance definition key.
+    This function uses a ConfigParser to parse the arguments and instantiate an object
+    defined by the instance_def_key.
+    Args:
+        args: An object containing the arguments to be parsed.
+        instance_def_key (str): The key used to retrieve the instance definition from the parsed content.
+    Returns:
+        The instantiated object as defined by the instance_def_key in the parsed configuration.
+    """
+    parser = ConfigParser(vars(args))
+    parser.parse(True)
+    return parser.get_parsed_content(instance_def_key, instantiate=True)
+def prepare_maisi_controlnet_json_dataloader(
+    json_data_list: list | str,
+    data_base_dir: list | str,
+    batch_size: int = 1,
+    fold: int = 0,
+    cache_rate: float = 0.0,
+    rank: int = 0,
+    world_size: int = 1,
+) -> tuple[DataLoader, DataLoader]:
+    """
+    Prepare dataloaders for training and validation.
+    Args:
+        json_data_list (list | str): the name of JSON files listing the data.
+        data_base_dir (list | str): directory of files.
+        batch_size (int, optional): how many samples per batch to load . Defaults to 1.
+        fold (int, optional): fold index for cross validation. Defaults to 0.
+        cache_rate (float, optional): percentage of cached data in total. Defaults to 0.0.
+        rank (int, optional): rank of the current process. Defaults to 0.
+        world_size (int, optional): number of processes participating in the job. Defaults to 1.
+    Returns:
+        tuple[DataLoader, DataLoader]:  A tuple of two dataloaders (training, validation).
+    """
+    use_ddp = world_size > 1
+    if isinstance(json_data_list, list):
+        assert isinstance(data_base_dir, list)
+        list_train = []
+        list_valid = []
+        for data_list, data_root in zip(json_data_list, data_base_dir):
+            with open(data_list, "r") as f:
+                json_data = json.load(f)["training"]
+            train, val = add_data_dir2path(json_data, data_root, fold)
+            list_train += train
+            list_valid += val
+    else:
+        with open(json_data_list, "r") as f:
+            json_data = json.load(f)["training"]
+        list_train, list_valid = add_data_dir2path(json_data, data_base_dir, fold)
+    common_transform = [
+        LoadImaged(keys=["image", "label"], image_only=True, ensure_channel_first=True),
+        Orientationd(keys=["label"], axcodes="RAS"),
+        EnsureTyped(keys=["label"], dtype=torch.uint8, track_meta=True),
+        Lambdad(keys="top_region_index", func=lambda x: torch.FloatTensor(x)),
+        Lambdad(keys="bottom_region_index", func=lambda x: torch.FloatTensor(x)),
+        Lambdad(keys="spacing", func=lambda x: torch.FloatTensor(x)),
+        Lambdad(keys=["top_region_index", "bottom_region_index", "spacing"], func=lambda x: x * 1e2),
+    ]
+    train_transforms, val_transforms = Compose(common_transform), Compose(common_transform)
+    train_loader = None
+    if use_ddp:
+        list_train = partition_dataset(data=list_train, shuffle=True, num_partitions=world_size, even_divisible=True)[
+            rank
+        ]
+    train_ds = CacheDataset(data=list_train, transform=train_transforms, cache_rate=cache_rate, num_workers=8)
+    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
+    if use_ddp:
+        list_valid = partition_dataset(data=list_valid, shuffle=True, num_partitions=world_size, even_divisible=False)[
+            rank
+        ]
+    val_ds = CacheDataset(data=list_valid, transform=val_transforms, cache_rate=cache_rate, num_workers=8)
+    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=False)
+    return train_loader, val_loader
+def organ_fill_by_closing(data, target_label, device, close_times=2, filter_size=3, pad_value=0.0):
+    """
+    Fill holes in an organ mask using morphological closing operations.
+    This function performs a series of dilation and erosion operations to fill holes
+    in the organ mask identified by the target label.
+    Args:
+        data (ndarray): The input data containing organ labels.
+        target_label (int): The label of the organ to be processed.
+        device (str): The device to perform the operations on (e.g., 'cuda:0').
+        close_times (int, optional): Number of times to perform the closing operation. Defaults to 2.
+        filter_size (int, optional): Size of the filter for dilation and erosion. Defaults to 3.
+        pad_value (float, optional): Value used for padding in dilation and erosion. Defaults to 0.0.
+    Returns:
+        ndarray: Boolean mask of the filled organ.
+    """
+    mask = (data == target_label).astype(np.uint8)
+    mask = torch.from_numpy(mask).to(device)
+    for _ in range(close_times):
+        mask = dilate_one_img(mask, filter_size=filter_size, pad_value=pad_value)
+        mask = erode_one_img(mask, filter_size=filter_size, pad_value=pad_value)
+    return mask.cpu().numpy().astype(np.bool_)
+def organ_fill_by_removed_mask(data, target_label, remove_mask, device):
+    """
+    Fill an organ mask in regions where it was previously removed.
+    Args:
+        data (ndarray): The input data containing organ labels.
+        target_label (int): The label of the organ to be processed.
+        remove_mask (ndarray): Boolean mask indicating regions where the organ was removed.
+        device (str): The device to perform the operations on (e.g., 'cuda:0').
+    Returns:
+        ndarray: Boolean mask of the filled organ in previously removed regions.
+    """
+    mask = (data == target_label).astype(np.uint8)
+    mask = dilate_one_img(torch.from_numpy(mask).to(device), filter_size=3, pad_value=0.0)
+    mask = dilate_one_img(mask, filter_size=3, pad_value=0.0)
+    roi_oragn_mask = dilate_one_img(mask, filter_size=3, pad_value=0.0).cpu().numpy()
+    return (roi_oragn_mask * remove_mask).astype(np.bool_)
+def get_body_region_index_from_mask(input_mask):
+    """
+    Determine the top and bottom body region indices from an input mask.
+    Args:
+        input_mask (Tensor): Input mask tensor containing body region labels.
+    Returns:
+        tuple: Two lists representing the top and bottom region indices.
+    """
+    region_indices = {}
+    # head and neck
+    region_indices["region_0"] = [22, 120]
+    # thorax
+    region_indices["region_1"] = [28, 29, 30, 31, 32]
+    # abdomen
+    region_indices["region_2"] = [1, 2, 3, 4, 5, 14]
+    # pelvis and lower
+    region_indices["region_3"] = [93, 94]
+    nda = input_mask.cpu().numpy().squeeze()
+    unique_elements = np.lib.arraysetops.unique(nda)
+    unique_elements = list(unique_elements)
+    # print(f"nda: {nda.shape} {unique_elements}.")
+    overlap_array = np.zeros(len(region_indices), dtype=np.uint8)
+    for _j in range(len(region_indices)):
+        overlap = any(element in region_indices[f"region_{_j}"] for element in unique_elements)
+        overlap_array[_j] = np.uint8(overlap)
+    overlap_array_indices = np.nonzero(overlap_array)[0]
+    top_region_index = np.eye(len(region_indices), dtype=np.uint8)[np.amin(overlap_array_indices), ...]
+    top_region_index = list(top_region_index)
+    top_region_index = [int(_k) for _k in top_region_index]
+    bottom_region_index = np.eye(len(region_indices), dtype=np.uint8)[np.amax(overlap_array_indices), ...]
+    bottom_region_index = list(bottom_region_index)
+    bottom_region_index = [int(_k) for _k in bottom_region_index]
+    # print(f"{top_region_index} {bottom_region_index}")
+    return top_region_index, bottom_region_index
+def general_mask_generation_post_process(volume_t, target_tumor_label=None, device="cuda:0"):
+    """
+    Perform post-processing on a generated mask volume.
+    This function applies various refinement steps to improve the quality of the generated mask,
+    including body mask refinement, tumor prediction refinement, and organ-specific processing.
+    Args:
+        volume_t (ndarray): Input volume containing organ and tumor labels.
+        target_tumor_label (int, optional): Label of the target tumor. Defaults to None.
+        device (str, optional): Device to perform operations on. Defaults to "cuda:0".
+    Returns:
+        ndarray: Post-processed volume with refined organ and tumor labels.
+    """
+    # assume volume_t is np array with shape (H,W,D)
+    hepatic_vessel = volume_t == 25
+    airway = volume_t == 132
+    # ------------ refine body mask pred
+    body_region_mask = (
+        erode_one_img(torch.from_numpy((volume_t > 0)).to(device), filter_size=3, pad_value=0.0).cpu().numpy()
+    )
+    body_region_mask, _ = supress_non_largest_components(body_region_mask, [1])
+    body_region_mask = (
+        dilate_one_img(torch.from_numpy(body_region_mask).to(device), filter_size=3, pad_value=0.0)
+        .cpu()
+        .numpy()
+        .astype(np.uint8)
+    )
+    volume_t = volume_t * body_region_mask
+    # ------------ refine tumor pred
+    tumor_organ_dict = {23: 28, 24: 4, 26: 1, 27: 62, 128: 200}
+    for t in [23, 24, 26, 27, 128]:
+        if t != target_tumor_label:
+            volume_t[volume_t == t] = tumor_organ_dict[t]
+        else:
+            volume_t[organ_fill_by_closing(volume_t, target_label=t, device=device)] = t
+            volume_t[organ_fill_by_closing(volume_t, target_label=t, device=device)] = t
+    # we only keep the largest connected componet for tumors except hepatic tumor and bone lesion
+    if target_tumor_label != 26 and target_tumor_label != 128:
+        volume_t, _ = supress_non_largest_components(volume_t, [target_tumor_label], default_val=200)
+    target_tumor = volume_t == target_tumor_label
+    # ------------ remove undesired organ pred
+    # general post-process non-largest components suppression
+    # process 4 ROI organs + spleen + 2 kidney + 5 lung lobes + duodenum + inferior vena cava
+    oran_list = [1, 4, 10, 12, 3, 28, 29, 30, 31, 32, 5, 14, 13, 6, 7, 8, 9, 10]
+    if target_tumor_label != 128:
+        oran_list += list(range(33, 60))  # + list(range(63,87))
+    data, _ = supress_non_largest_components(volume_t, oran_list, default_val=200)  # 200 is body region
+    organ_remove_mask = (volume_t - data).astype(np.bool_)
+    # process intestinal system (stomach 12, duodenum 13, small bowel 19, colon 62)
+    intestinal_mask_ = (
+        (data == 12).astype(np.uint8)
+        + (data == 13).astype(np.uint8)
+        + (data == 19).astype(np.uint8)
+        + (data == 62).astype(np.uint8)
+    )
+    intestinal_mask, _ = supress_non_largest_components(intestinal_mask_, [1], default_val=0)
+    # process small bowel 19
+    small_bowel_remove_mask = (data == 19).astype(np.uint8) - (data == 19).astype(np.uint8) * intestinal_mask
+    # process colon 62
+    colon_remove_mask = (data == 62).astype(np.uint8) - (data == 62).astype(np.uint8) * intestinal_mask
+    intestinal_remove_mask = (small_bowel_remove_mask + colon_remove_mask).astype(np.bool_)
+    data[intestinal_remove_mask] = 200
+    # ------------ full correponding organ in removed regions
+    for organ_label in oran_list:
+        data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+    if target_tumor_label == 23 and np.sum(target_tumor) > 0:
+        # speical process for cases with lung tumor
+        dia_lung_tumor_mask = (
+            dilate_one_img(torch.from_numpy((data == 23)).to(device), filter_size=3, pad_value=0.0).cpu().numpy()
+        )
+        tmp = (
+            (data * (dia_lung_tumor_mask.astype(np.uint8) - (data == 23).astype(np.uint8))).astype(np.float32).flatten()
+        )
+        tmp[tmp == 0] = float("nan")
+        mode = int(stats.mode(tmp.flatten(), nan_policy="omit")[0])
+        if mode in [28, 29, 30, 31, 32]:
+            dia_lung_tumor_mask = (
+                dilate_one_img(torch.from_numpy(dia_lung_tumor_mask).to(device), filter_size=3, pad_value=0.0)
+                .cpu()
+                .numpy()
+            )
+            lung_remove_mask = dia_lung_tumor_mask.astype(np.uint8) - (data == 23).astype(np.uint8).astype(np.uint8)
+            data[organ_fill_by_removed_mask(data, target_label=mode, remove_mask=lung_remove_mask, device=device)] = (
+                mode
+            )
+        dia_lung_tumor_mask = (
+            dilate_one_img(torch.from_numpy(dia_lung_tumor_mask).to(device), filter_size=3, pad_value=0.0).cpu().numpy()
+        )
+        data[
+            organ_fill_by_removed_mask(
+                data, target_label=23, remove_mask=dia_lung_tumor_mask * organ_remove_mask, device=device
+            )
+        ] = 23
+        for organ_label in [28, 29, 30, 31, 32]:
+            data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+            data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+            data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+    if target_tumor_label == 26 and np.sum(target_tumor) > 0:
+        # speical process for cases with hepatic tumor
+        # process liver 1
+        data[organ_fill_by_removed_mask(data, target_label=1, remove_mask=intestinal_remove_mask, device=device)] = 1
+        data[organ_fill_by_removed_mask(data, target_label=1, remove_mask=intestinal_remove_mask, device=device)] = 1
+        # process spleen 2
+        data[organ_fill_by_removed_mask(data, target_label=3, remove_mask=organ_remove_mask, device=device)] = 3
+        data[organ_fill_by_removed_mask(data, target_label=3, remove_mask=organ_remove_mask, device=device)] = 3
+        dia_tumor_mask = (
+            dilate_one_img(torch.from_numpy((data == target_tumor_label)).to(device), filter_size=3, pad_value=0.0)
+            .cpu()
+            .numpy()
+        )
+        dia_tumor_mask = (
+            dilate_one_img(torch.from_numpy(dia_tumor_mask).to(device), filter_size=3, pad_value=0.0).cpu().numpy()
+        )
+        data[
+            organ_fill_by_removed_mask(
+                data, target_label=target_tumor_label, remove_mask=dia_tumor_mask * organ_remove_mask, device=device
+            )
+        ] = target_tumor_label
+        # refine hepatic tumor
+        hepatic_tumor_vessel_liver_mask_ = (
+            (data == 26).astype(np.uint8) + (data == 25).astype(np.uint8) + (data == 1).astype(np.uint8)
+        )
+        hepatic_tumor_vessel_liver_mask_ = (hepatic_tumor_vessel_liver_mask_ > 1).astype(np.uint8)
+        hepatic_tumor_vessel_liver_mask, _ = supress_non_largest_components(
+            hepatic_tumor_vessel_liver_mask_, [1], default_val=0
+        )
+        removed_region = (hepatic_tumor_vessel_liver_mask_ - hepatic_tumor_vessel_liver_mask).astype(np.bool_)
+        data[removed_region] = 200
+        target_tumor = (target_tumor * hepatic_tumor_vessel_liver_mask).astype(np.bool_)
+        # refine liver
+        data[organ_fill_by_closing(data, target_label=1, device=device)] = 1
+        data[organ_fill_by_closing(data, target_label=1, device=device)] = 1
+        data[organ_fill_by_closing(data, target_label=1, device=device)] = 1
+    if target_tumor_label == 27 and np.sum(target_tumor) > 0:
+        # speical process for cases with colon tumor
+        dia_tumor_mask = (
+            dilate_one_img(torch.from_numpy((data == target_tumor_label)).to(device), filter_size=3, pad_value=0.0)
+            .cpu()
+            .numpy()
+        )
+        dia_tumor_mask = (
+            dilate_one_img(torch.from_numpy(dia_tumor_mask).to(device), filter_size=3, pad_value=0.0).cpu().numpy()
+        )
+        data[
+            organ_fill_by_removed_mask(
+                data, target_label=target_tumor_label, remove_mask=dia_tumor_mask * organ_remove_mask, device=device
+            )
+        ] = target_tumor_label
+    if target_tumor_label == 129 and np.sum(target_tumor) > 0:
+        # speical process for cases with kidney tumor
+        for organ_label in [5, 14]:
+            data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+            data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+            data[organ_fill_by_closing(data, target_label=organ_label, device=device)] = organ_label
+    # TODO: current model does not support hepatic vessel by size control.
+    # we treat it as liver for better visiaulization
+    print(
+        "Current model does not support hepatic vessel by size control, "
+        "so we treat generated hepatic vessel as part of liver for better visiaulization."
+    )
+    data[hepatic_vessel] = 1
+    data[airway] = 132
+    if target_tumor_label is not None:
+        data[target_tumor] = target_tumor_label
+    return data
+class MapLabelValue:
+    """
+    Utility to map label values to another set of values.
+    For example, map [3, 2, 1] to [0, 1, 2], [1, 2, 3] -> [0.5, 1.5, 2.5], ["label3", "label2", "label1"] -> [0, 1, 2],
+    [3.5, 2.5, 1.5] -> ["label0", "label1", "label2"], etc.
+    The label data must be numpy array or array-like data and the output data will be numpy array.
+    """
+    backend = [TransformBackends.NUMPY, TransformBackends.TORCH]
+    def __init__(self, orig_labels: Sequence, target_labels: Sequence, dtype: DtypeLike = np.float32) -> None:
+        """
+        Args:
+            orig_labels: original labels that map to others.
+            target_labels: expected label values, 1: 1 map to the `orig_labels`.
+            dtype: convert the output data to dtype, default to float32.
+                if dtype is from PyTorch, the transform will use the pytorch backend, else with numpy backend.
+        """
+        if len(orig_labels) != len(target_labels):
+            raise ValueError("orig_labels and target_labels must have the same length.")
+        self.orig_labels = orig_labels
+        self.target_labels = target_labels
+        self.pair = tuple((o, t) for o, t in zip(self.orig_labels, self.target_labels) if o != t)
+        type_dtype = type(dtype)
+        if getattr(type_dtype, "__module__", "") == "torch":
+            self.use_numpy = False
+            self.dtype = get_equivalent_dtype(dtype, data_type=torch.Tensor)
+        else:
+            self.use_numpy = True
+            self.dtype = get_equivalent_dtype(dtype, data_type=np.ndarray)
+    def __call__(self, img: NdarrayOrTensor):
+        """
+        Apply the label mapping to the input image.
+        Args:
+            img (NdarrayOrTensor): Input image to be remapped.
+        Returns:
+            NdarrayOrTensor: Remapped image.
+        """
+        if self.use_numpy:
+            img_np, *_ = convert_data_type(img, np.ndarray)
+            _out_shape = img_np.shape
+            img_flat = img_np.flatten()
+            try:
+                out_flat = img_flat.astype(self.dtype)
+            except ValueError:
+                # can't copy unchanged labels as the expected dtype is not supported, must map all the label values
+                out_flat = np.zeros(shape=img_flat.shape, dtype=self.dtype)
+            for o, t in self.pair:
+                out_flat[img_flat == o] = t
+            out_t = out_flat.reshape(_out_shape)
+        else:
+            img_t, *_ = convert_data_type(img, torch.Tensor)
+            out_t = img_t.detach().clone().to(self.dtype)  # type: ignore
+            for o, t in self.pair:
+                out_t[img_t == o] = t
+        out, *_ = convert_to_dst_type(src=out_t, dst=img, dtype=self.dtype)
+        return out
+def dynamic_infer(inferer, model, images):
+    """
+    Perform dynamic inference using a model and an inferer, typically a monai SlidingWindowInferer.
+    This function determines whether to use the model directly or to use the provided inferer
+    (such as a sliding window inferer) based on the size of the input images.
+    Args:
+        inferer: An inference object, typically a monai SlidingWindowInferer, which handles patch-based inference.
+        model (torch.nn.Module): The model used for inference.
+        images (torch.Tensor): The input images for inference, shape [N,C,H,W,D] or [N,C,H,W].
+    Returns:
+        torch.Tensor: The output from the model or the inferer, depending on the input size.
+    """
+    if torch.numel(images[0:1, 0:1, ...]) < math.prod(inferer.roi_size):
+        return model(images)
+    else:
+        # Extract the spatial dimensions from the images tensor (H, W, D)
+        spatial_dims = images.shape[2:]
+        orig_roi = inferer.roi_size
+        # Check that roi has the same number of dimensions as spatial_dims
+        if len(orig_roi) != len(spatial_dims):
+            raise ValueError(f"ROI length ({len(orig_roi)}) does not match spatial dimensions ({len(spatial_dims)}).")
+        # Iterate and adjust each ROI dimension
+        adjusted_roi = [min(roi_dim, img_dim) for roi_dim, img_dim in zip(orig_roi, spatial_dims)]
+        inferer.roi_size = adjusted_roi
+        output = inferer(network=model, inputs=images)
+        inferer.roi_size = orig_roi
+        return output