project-monai commited on Apr 9

Commit

41c525d

verified ·

1 Parent(s): 3d672fd

Upload wholeBrainSeg_Large_UNEST_segmentation version 0.2.6

Browse files

Files changed (22) hide show

.gitattributes +3 -0
LICENSE +201 -0
configs/inference.json +136 -0
configs/logging.conf +21 -0
configs/metadata.json +223 -0
configs/multi_gpu_train.json +36 -0
configs/train.json +299 -0
docs/3DSlicer_use.png +3 -0
docs/README.md +189 -0
docs/demo.png +3 -0
docs/training.png +0 -0
docs/unest.png +0 -0
docs/wholebrain.png +3 -0
models/model.pt +3 -0
scripts/__init__.py +10 -0
scripts/networks/__init__.py +10 -0
scripts/networks/nest/__init__.py +16 -0
scripts/networks/nest/utils.py +481 -0
scripts/networks/nest_transformer_3D.py +489 -0
scripts/networks/patchEmbed3D.py +190 -0
scripts/networks/unest_base_patch_4.py +249 -0
scripts/networks/unest_block.py +245 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/3DSlicer_use.png filter=lfs diff=lfs merge=lfs -text
+docs/demo.png filter=lfs diff=lfs merge=lfs -text
+docs/wholebrain.png filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

configs/inference.json ADDED Viewed

	@@ -0,0 +1,136 @@

+{
+    "imports": [
+        "$import glob",
+        "$import os"
+    ],
+    "bundle_root": ".",
+    "output_dir": "$@bundle_root + '/eval'",
+    "dataset_dir": "$@bundle_root + '/dataset/images'",
+    "datalist": "$list(sorted(glob.glob(@dataset_dir + '/*.nii.gz')))",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "network_def": {
+        "_target_": "scripts.networks.unest_base_patch_4.UNesT",
+        "in_channels": 1,
+        "out_channels": 133,
+        "patch_size": 4,
+        "depths": [
+            2,
+            2,
+            8
+        ],
+        "embed_dim": [
+            128,
+            256,
+            512
+        ],
+        "num_heads": [
+            4,
+            8,
+            16
+        ]
+    },
+    "network": "$@network_def.to(@device)",
+    "preprocessing": {
+        "_target_": "Compose",
+        "transforms": [
+            {
+                "_target_": "LoadImaged",
+                "keys": "image"
+            },
+            {
+                "_target_": "EnsureChannelFirstd",
+                "keys": "image"
+            },
+            {
+                "_target_": "NormalizeIntensityd",
+                "keys": "image",
+                "nonzero": "True",
+                "channel_wise": "True"
+            },
+            {
+                "_target_": "EnsureTyped",
+                "keys": "image"
+            }
+        ]
+    },
+    "dataset": {
+        "_target_": "Dataset",
+        "data": "$[{'image': i} for i in @datalist]",
+        "transform": "@preprocessing"
+    },
+    "dataloader": {
+        "_target_": "DataLoader",
+        "dataset": "@dataset",
+        "batch_size": 1,
+        "shuffle": false,
+        "num_workers": 4
+    },
+    "inferer": {
+        "_target_": "SlidingWindowInferer",
+        "roi_size": [
+            96,
+            96,
+            96
+        ],
+        "sw_batch_size": 4,
+        "overlap": 0.7
+    },
+    "postprocessing": {
+        "_target_": "Compose",
+        "transforms": [
+            {
+                "_target_": "Activationsd",
+                "keys": "pred",
+                "softmax": true
+            },
+            {
+                "_target_": "Invertd",
+                "keys": "pred",
+                "transform": "@preprocessing",
+                "orig_keys": "image",
+                "meta_key_postfix": "meta_dict",
+                "nearest_interp": false,
+                "to_tensor": true
+            },
+            {
+                "_target_": "AsDiscreted",
+                "keys": "pred",
+                "argmax": true
+            },
+            {
+                "_target_": "SaveImaged",
+                "keys": "pred",
+                "meta_keys": "pred_meta_dict",
+                "output_dir": "@output_dir"
+            }
+        ]
+    },
+    "handlers": [
+        {
+            "_target_": "CheckpointLoader",
+            "load_path": "$@bundle_root + '/models/model.pt'",
+            "load_dict": {
+                "model": "@network"
+            },
+            "strict": "True"
+        },
+        {
+            "_target_": "StatsHandler",
+            "iteration_log": false
+        }
+    ],
+    "evaluator": {
+        "_target_": "SupervisedEvaluator",
+        "device": "@device",
+        "val_data_loader": "@dataloader",
+        "network": "@network",
+        "inferer": "@inferer",
+        "postprocessing": "@postprocessing",
+        "val_handlers": "@handlers",
+        "amp": false
+    },
+    "evaluating": [
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "[email protected]()"
+    ]
+}

configs/logging.conf ADDED Viewed

	@@ -0,0 +1,21 @@

+[loggers]
+keys=root
+[handlers]
+keys=consoleHandler
+[formatters]
+keys=fullFormatter
+[logger_root]
+level=INFO
+handlers=consoleHandler
+[handler_consoleHandler]
+class=StreamHandler
+level=INFO
+formatter=fullFormatter
+args=(sys.stdout,)
+[formatter_fullFormatter]
+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s

configs/metadata.json ADDED Viewed

	@@ -0,0 +1,223 @@

+{
+    "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20240725.json",
+    "version": "0.2.6",
+    "changelog": {
+        "0.2.6": "update to huggingface hosting",
+        "0.2.5": "update large files",
+        "0.2.4": "fix black 24.1 format error",
+        "0.2.3": "fix PYTHONPATH in readme.md",
+        "0.2.2": "add name tag",
+        "0.2.1": "fix license Copyright error",
+        "0.2.0": "update license files",
+        "0.1.2": "Add training support for whole brain segmentation, users can use active learning in the MONAI Label",
+        "0.1.1": "Fix dimension according to MONAI 1.0 and fix readme file",
+        "0.1.0": "complete the model package"
+    },
+    "monai_version": "1.4.0",
+    "pytorch_version": "2.4.0",
+    "numpy_version": "1.24.4",
+    "required_packages_version": {
+        "nibabel": "5.2.1",
+        "pytorch-ignite": "0.4.11",
+        "einops": "0.7.0",
+        "fire": "0.6.0",
+        "timm": "0.6.7",
+        "torchvision": "0.19.0",
+        "tensorboard": "2.17.0"
+    },
+    "supported_apps": {},
+    "name": "Whole brain large UNEST segmentation",
+    "task": "Whole Brain Segmentation",
+    "description": "A 3D transformer-based model for whole brain segmentation from T1W MRI image",
+    "authors": "Vanderbilt University + MONAI team",
+    "copyright": "Copyright (c) MONAI Consortium",
+    "data_source": "",
+    "data_type": "nibabel",
+    "image_classes": "single channel data, intensity scaled to [0, 1]",
+    "label_classes": "133 Classes",
+    "pred_classes": "133 Classes",
+    "eval_metrics": {
+        "mean_dice": 0.71
+    },
+    "intended_use": "This is an example, not to be used for diagnostic purposes",
+    "references": [
+        "Xin, et al. Characterizing Renal Structures with 3D Block Aggregate Transformers. arXiv preprint arXiv:2203.02430 (2022). https://arxiv.org/pdf/2203.02430.pdf"
+    ],
+    "network_data_format": {
+        "inputs": {
+            "image": {
+                "type": "image",
+                "format": "hounsfield",
+                "modality": "MRI",
+                "num_channels": 1,
+                "spatial_shape": [
+                    96,
+                    96,
+                    96
+                ],
+                "dtype": "float32",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true,
+                "channel_def": {
+                    "0": "image"
+                }
+            }
+        },
+        "outputs": {
+            "pred": {
+                "type": "image",
+                "format": "segmentation",
+                "num_channels": 133,
+                "spatial_shape": [
+                    96,
+                    96,
+                    96
+                ],
+                "dtype": "float32",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true,
+                "channel_def": {
+                    "0": "background",
+                    "1": "3rd-Ventricle",
+                    "2": "4th-Ventricle",
+                    "3": "Right-Accumbens-Area",
+                    "4": "Left-Accumbens-Area",
+                    "5": "Right-Amygdala",
+                    "6": "Left-Amygdala",
+                    "7": "Brain-Stem",
+                    "8": "Right-Caudate",
+                    "9": "Left-Caudate",
+                    "10": "Right-Cerebellum-Exterior",
+                    "11": "Left-Cerebellum-Exterior",
+                    "12": "Right-Cerebellum-White-Matter",
+                    "13": "Left-Cerebellum-White-Matter",
+                    "14": "Right-Cerebral-White-Matter",
+                    "15": "Left-Cerebral-White-Matter",
+                    "16": "Right-Hippocampus",
+                    "17": "Left-Hippocampus",
+                    "18": "Right-Inf-Lat-Vent",
+                    "19": "Left-Inf-Lat-Vent",
+                    "20": "Right-Lateral-Ventricle",
+                    "21": "Left-Lateral-Ventricle",
+                    "22": "Right-Pallidum",
+                    "23": "Left-Pallidum",
+                    "24": "Right-Putamen",
+                    "25": "Left-Putamen",
+                    "26": "Right-Thalamus-Proper",
+                    "27": "Left-Thalamus-Proper",
+                    "28": "Right-Ventral-DC",
+                    "29": "Left-Ventral-DC",
+                    "30": "Cerebellar-Vermal-Lobules-I-V",
+                    "31": "Cerebellar-Vermal-Lobules-VI-VII",
+                    "32": "Cerebellar-Vermal-Lobules-VIII-X",
+                    "33": "Left-Basal-Forebrain",
+                    "34": "Right-Basal-Forebrain",
+                    "35": "Right-ACgG--anterior-cingulate-gyrus",
+                    "36": "Left-ACgG--anterior-cingulate-gyrus",
+                    "37": "Right-AIns--anterior-insula",
+                    "38": "Left-AIns--anterior-insula",
+                    "39": "Right-AOrG--anterior-orbital-gyrus",
+                    "40": "Left-AOrG--anterior-orbital-gyrus",
+                    "41": "Right-AnG---angular-gyrus",
+                    "42": "Left-AnG---angular-gyrus",
+                    "43": "Right-Calc--calcarine-cortex",
+                    "44": "Left-Calc--calcarine-cortex",
+                    "45": "Right-CO----central-operculum",
+                    "46": "Left-CO----central-operculum",
+                    "47": "Right-Cun---cuneus",
+                    "48": "Left-Cun---cuneus",
+                    "49": "Right-Ent---entorhinal-area",
+                    "50": "Left-Ent---entorhinal-area",
+                    "51": "Right-FO----frontal-operculum",
+                    "52": "Left-FO----frontal-operculum",
+                    "53": "Right-FRP---frontal-pole",
+                    "54": "Left-FRP---frontal-pole",
+                    "55": "Right-FuG---fusiform-gyrus ",
+                    "56": "Left-FuG---fusiform-gyrus",
+                    "57": "Right-GRe---gyrus-rectus",
+                    "58": "Left-GRe---gyrus-rectus",
+                    "59": "Right-IOG---inferior-occipital-gyrus",
+                    "60": "Left-IOG---inferior-occipital-gyrus",
+                    "61": "Right-ITG---inferior-temporal-gyrus",
+                    "62": "Left-ITG---inferior-temporal-gyrus",
+                    "63": "Right-LiG---lingual-gyrus",
+                    "64": "Left-LiG---lingual-gyrus",
+                    "65": "Right-LOrG--lateral-orbital-gyrus",
+                    "66": "Left-LOrG--lateral-orbital-gyrus",
+                    "67": "Right-MCgG--middle-cingulate-gyrus",
+                    "68": "Left-MCgG--middle-cingulate-gyrus",
+                    "69": "Right-MFC---medial-frontal-cortex",
+                    "70": "Left-MFC---medial-frontal-cortex",
+                    "71": "Right-MFG---middle-frontal-gyrus",
+                    "72": "Left-MFG---middle-frontal-gyrus",
+                    "73": "Right-MOG---middle-occipital-gyrus",
+                    "74": "Left-MOG---middle-occipital-gyrus",
+                    "75": "Right-MOrG--medial-orbital-gyrus",
+                    "76": "Left-MOrG--medial-orbital-gyrus",
+                    "77": "Right-MPoG--postcentral-gyrus",
+                    "78": "Left-MPoG--postcentral-gyrus",
+                    "79": "Right-MPrG--precentral-gyrus",
+                    "80": "Left-MPrG--precentral-gyrus",
+                    "81": "Right-MSFG--superior-frontal-gyrus",
+                    "82": "Left-MSFG--superior-frontal-gyrus",
+                    "83": "Right-MTG---middle-temporal-gyrus",
+                    "84": "Left-MTG---middle-temporal-gyrus",
+                    "85": "Right-OCP---occipital-pole",
+                    "86": "Left-OCP---occipital-pole",
+                    "87": "Right-OFuG--occipital-fusiform-gyrus",
+                    "88": "Left-OFuG--occipital-fusiform-gyrus",
+                    "89": "Right-OpIFG-opercular-part-of-the-IFG",
+                    "90": "Left-OpIFG-opercular-part-of-the-IFG",
+                    "91": "Right-OrIFG-orbital-part-of-the-IFG",
+                    "92": "Left-OrIFG-orbital-part-of-the-IFG",
+                    "93": "Right-PCgG--posterior-cingulate-gyrus",
+                    "94": "Left-PCgG--posterior-cingulate-gyrus",
+                    "95": "Right-PCu---precuneus",
+                    "96": "Left-PCu---precuneus",
+                    "97": "Right-PHG---parahippocampal-gyrus",
+                    "98": "Left-PHG---parahippocampal-gyrus",
+                    "99": "Right-PIns--posterior-insula",
+                    "100": "Left-PIns--posterior-insula",
+                    "101": "Right-PO----parietal-operculum",
+                    "102": "Left-PO----parietal-operculum",
+                    "103": "Right-PoG---postcentral-gyrus",
+                    "104": "Left-PoG---postcentral-gyrus",
+                    "105": "Right-POrG--posterior-orbital-gyrus",
+                    "106": "Left-POrG--posterior-orbital-gyrus",
+                    "107": "Right-PP----planum-polare",
+                    "108": "Left-PP----planum-polare",
+                    "109": "Right-PrG---precentral-gyrus",
+                    "110": "Left-PrG---precentral-gyrus",
+                    "111": "Right-PT----planum-temporale",
+                    "112": "Left-PT----planum-temporale",
+                    "113": "Right-SCA---subcallosal-area",
+                    "114": "Left-SCA---subcallosal-area",
+                    "115": "Right-SFG---superior-frontal-gyrus",
+                    "116": "Left-SFG---superior-frontal-gyrus",
+                    "117": "Right-SMC---supplementary-motor-cortex",
+                    "118": "Left-SMC---supplementary-motor-cortex",
+                    "119": "Right-SMG---supramarginal-gyrus",
+                    "120": "Left-SMG---supramarginal-gyrus",
+                    "121": "Right-SOG---superior-occipital-gyrus",
+                    "122": "Left-SOG---superior-occipital-gyrus",
+                    "123": "Right-SPL---superior-parietal-lobule",
+                    "124": "Left-SPL---superior-parietal-lobule",
+                    "125": "Right-STG---superior-temporal-gyrus",
+                    "126": "Left-STG---superior-temporal-gyrus",
+                    "127": "Right-TMP---temporal-pole",
+                    "128": "Left-TMP---temporal-pole",
+                    "129": "Right-TrIFG-triangular-part-of-the-IFG",
+                    "130": "Left-TrIFG-triangular-part-of-the-IFG",
+                    "131": "Right-TTG---transverse-temporal-gyrus",
+                    "132": "Left-TTG---transverse-temporal-gyrus"
+                }
+            }
+        }
+    }
+}

configs/multi_gpu_train.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
+    "network": {
+        "_target_": "torch.nn.parallel.DistributedDataParallel",
+        "module": "$@network_def.to(@device)",
+        "device_ids": [
+            "@device"
+        ]
+    },
+    "train#sampler": {
+        "_target_": "DistributedSampler",
+        "dataset": "@train#dataset",
+        "even_divisible": true,
+        "shuffle": true
+    },
+    "train#dataloader#sampler": "@train#sampler",
+    "train#dataloader#shuffle": false,
+    "train#trainer#train_handlers": "$@train#handlers[: -2 if dist.get_rank() > 0 else None]",
+    "validate#sampler": {
+        "_target_": "DistributedSampler",
+        "dataset": "@validate#dataset",
+        "even_divisible": false,
+        "shuffle": false
+    },
+    "validate#dataloader#sampler": "@validate#sampler",
+    "validate#evaluator#val_handlers": "$None if dist.get_rank() > 0 else @validate#handlers",
+    "training": [
+        "$import torch.distributed as dist",
+        "$dist.init_process_group(backend='nccl')",
+        "$torch.cuda.set_device(@device)",
+        "$monai.utils.set_determinism(seed=123)",
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "$@train#trainer.run()",
+        "$dist.destroy_process_group()"
+    ]
+}

configs/train.json ADDED Viewed

	@@ -0,0 +1,299 @@

+{
+    "imports": [
+        "$import glob",
+        "$import os",
+        "$import ignite"
+    ],
+    "bundle_root": ".",
+    "ckpt_dir": "$@bundle_root + '/models'",
+    "output_dir": "$@bundle_root + '/eval'",
+    "dataset_dir": "$@bundle_root + '/dataset/brain'",
+    "images": "$list(sorted(glob.glob(@dataset_dir + '/images/*.nii.gz')))",
+    "labels": "$list(sorted(glob.glob(@dataset_dir + '/labels/*.nii.gz')))",
+    "val_interval": 5,
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "network_def": {
+        "_target_": "scripts.networks.unest_base_patch_4.UNesT",
+        "in_channels": 1,
+        "out_channels": 133,
+        "patch_size": 4,
+        "depths": [
+            2,
+            2,
+            8
+        ],
+        "embed_dim": [
+            128,
+            256,
+            512
+        ],
+        "num_heads": [
+            4,
+            8,
+            16
+        ]
+    },
+    "network": "$@network_def.to(@device)",
+    "loss": {
+        "_target_": "DiceCELoss",
+        "to_onehot_y": true,
+        "softmax": true,
+        "squared_pred": true,
+        "batch": true
+    },
+    "optimizer": {
+        "_target_": "torch.optim.Adam",
+        "params": "[email protected]()",
+        "lr": 0.0001
+    },
+    "train": {
+        "deterministic_transforms": [
+            {
+                "_target_": "LoadImaged",
+                "keys": [
+                    "image",
+                    "label"
+                ]
+            },
+            {
+                "_target_": "EnsureChannelFirstd",
+                "keys": [
+                    "image",
+                    "label"
+                ]
+            },
+            {
+                "_target_": "EnsureTyped",
+                "keys": [
+                    "image",
+                    "label"
+                ]
+            }
+        ],
+        "random_transforms": [
+            {
+                "_target_": "RandSpatialCropd",
+                "keys": [
+                    "image",
+                    "label"
+                ],
+                "roi_size": [
+                    96,
+                    96,
+                    96
+                ],
+                "random_size": false
+            },
+            {
+                "_target_": "RandFlipd",
+                "keys": [
+                    "image",
+                    "label"
+                ],
+                "spatial_axis": [
+                    0
+                ],
+                "prob": 0.1
+            },
+            {
+                "_target_": "RandFlipd",
+                "keys": [
+                    "image",
+                    "label"
+                ],
+                "spatial_axis": [
+                    1
+                ],
+                "prob": 0.1
+            },
+            {
+                "_target_": "RandFlipd",
+                "keys": [
+                    "image",
+                    "label"
+                ],
+                "spatial_axis": [
+                    2
+                ],
+                "prob": 0.1
+            },
+            {
+                "_target_": "RandRotate90d",
+                "keys": [
+                    "image",
+                    "label"
+                ],
+                "max_k": 3,
+                "prob": 0.1
+            },
+            {
+                "_target_": "NormalizeIntensityd",
+                "keys": "image",
+                "nonzero": true,
+                "channel_wise": true
+            }
+        ],
+        "preprocessing": {
+            "_target_": "Compose",
+            "transforms": "$@train#deterministic_transforms + @train#random_transforms"
+        },
+        "dataset": {
+            "_target_": "CacheDataset",
+            "data": "$[{'image': i, 'label': l} for i, l in zip(@images[:-2], @labels[:-2])]",
+            "transform": "@train#preprocessing",
+            "cache_rate": 1.0,
+            "num_workers": 2
+        },
+        "dataloader": {
+            "_target_": "DataLoader",
+            "dataset": "@train#dataset",
+            "batch_size": 1,
+            "shuffle": true,
+            "num_workers": 1
+        },
+        "inferer": {
+            "_target_": "SimpleInferer"
+        },
+        "postprocessing": {
+            "_target_": "Compose",
+            "transforms": [
+                {
+                    "_target_": "Activationsd",
+                    "keys": "pred",
+                    "softmax": true
+                },
+                {
+                    "_target_": "AsDiscreted",
+                    "keys": [
+                        "pred",
+                        "label"
+                    ],
+                    "argmax": [
+                        true,
+                        false
+                    ],
+                    "to_onehot": 133
+                }
+            ]
+        },
+        "handlers": [
+            {
+                "_target_": "ValidationHandler",
+                "validator": "@validate#evaluator",
+                "epoch_level": true,
+                "interval": "@val_interval"
+            },
+            {
+                "_target_": "StatsHandler",
+                "tag_name": "train_loss",
+                "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
+            },
+            {
+                "_target_": "TensorBoardStatsHandler",
+                "log_dir": "@output_dir",
+                "tag_name": "train_loss",
+                "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
+            }
+        ],
+        "key_metric": {
+            "train_accuracy": {
+                "_target_": "ignite.metrics.Accuracy",
+                "output_transform": "$monai.handlers.from_engine(['pred', 'label'])"
+            }
+        },
+        "trainer": {
+            "_target_": "SupervisedTrainer",
+            "max_epochs": 2000,
+            "device": "@device",
+            "train_data_loader": "@train#dataloader",
+            "network": "@network",
+            "loss_function": "@loss",
+            "optimizer": "@optimizer",
+            "inferer": "@train#inferer",
+            "postprocessing": "@train#postprocessing",
+            "key_train_metric": "@train#key_metric",
+            "train_handlers": "@train#handlers",
+            "amp": true
+        }
+    },
+    "validate": {
+        "preprocessing": {
+            "_target_": "Compose",
+            "transforms": "%train#deterministic_transforms"
+        },
+        "dataset": {
+            "_target_": "CacheDataset",
+            "data": "$[{'image': i, 'label': l} for i, l in zip(@images[-2:], @labels[-2:])]",
+            "transform": "@validate#preprocessing",
+            "cache_rate": 1.0
+        },
+        "dataloader": {
+            "_target_": "DataLoader",
+            "dataset": "@validate#dataset",
+            "batch_size": 2,
+            "shuffle": false,
+            "num_workers": 1
+        },
+        "inferer": {
+            "_target_": "SlidingWindowInferer",
+            "roi_size": [
+                96,
+                96,
+                96
+            ],
+            "sw_batch_size": 4,
+            "overlap": 0.5
+        },
+        "postprocessing": "%train#postprocessing",
+        "handlers": [
+            {
+                "_target_": "StatsHandler",
+                "iteration_log": false
+            },
+            {
+                "_target_": "TensorBoardStatsHandler",
+                "log_dir": "@output_dir",
+                "iteration_log": false
+            },
+            {
+                "_target_": "CheckpointSaver",
+                "save_dir": "@ckpt_dir",
+                "save_dict": {
+                    "model": "@network"
+                },
+                "save_key_metric": true,
+                "key_metric_filename": "model.pt"
+            }
+        ],
+        "key_metric": {
+            "val_mean_dice": {
+                "_target_": "MeanDice",
+                "include_background": false,
+                "output_transform": "$monai.handlers.from_engine(['pred', 'label'])"
+            }
+        },
+        "additional_metrics": {
+            "val_accuracy": {
+                "_target_": "ignite.metrics.Accuracy",
+                "output_transform": "$monai.handlers.from_engine(['pred', 'label'])"
+            }
+        },
+        "evaluator": {
+            "_target_": "SupervisedEvaluator",
+            "device": "@device",
+            "val_data_loader": "@validate#dataloader",
+            "network": "@network",
+            "inferer": "@validate#inferer",
+            "postprocessing": "@validate#postprocessing",
+            "key_val_metric": "@validate#key_metric",
+            "additional_metrics": "@validate#additional_metrics",
+            "val_handlers": "@validate#handlers",
+            "amp": true
+        }
+    },
+    "training": [
+        "$monai.utils.set_determinism(seed=123)",
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "$@train#trainer.run()"
+    ]
+}

docs/3DSlicer_use.png ADDED Viewed

Git LFS Details

SHA256: d6fe205d20ef8895b8ac2420e5c7682091ce11341027846e93b735c97ceba6b2
Pointer size: 131 Bytes
Size of remote file: 609 kB

docs/README.md ADDED Viewed

	@@ -0,0 +1,189 @@

+# Description
+Detailed whole brain segmentation is an essential quantitative technique in medical image analysis, which provides a non-invasive way of measuring brain regions from a clinical acquired structural magnetic resonance imaging (MRI).
+We provide the pre-trained model for training and inferencing whole brain segmentation with 133 structures.
+Training pipeline is provided to support active learning in MONAI Label and training with bundle.
+A tutorial and release of model for whole brain segmentation using the 3D transformer-based segmentation model UNEST.
+Authors:
+Xin Yu ([email protected])
+Yinchi Zhou ([email protected]) | Yucheng Tang ([email protected])
+<p align="center">
+-------------------------------------------------------------------------------------
+</p>
+![](./demo.png) <br>
+<p align="center">
+Fig.1 - The demonstration of T1w MRI images registered in MNI space and the whole brain segmentation labels with 133 classes</p>
+# Model Overview
+A pre-trained UNEST base model [1] for volumetric (3D) whole brain segmentation with T1w MR images.
+To leverage information across embedded sequences, ”shifted window” transformers
+are proposed for dense predictions and modeling multi-scale features. However, these
+attempts that aim to complicate the self-attention range often yield high computation
+complexity and data inefficiency. Inspired by the aggregation function in the nested
+ViT, we propose a new design of a 3D U-shape medical segmentation model with
+Nested Transformers (UNesT) hierarchically with the 3D block aggregation function,
+that learn locality behaviors for small structures or small dataset. This design retains
+the original global self-attention mechanism and achieves information communication
+across patches by stacking transformer encoders hierarchically.
+![](./unest.png) <br>
+<p align="center">
+Fig.2 - The network architecture of UNEST Base model
+</p>
+## Data
+The training data is from the Vanderbilt University and Vanderbilt University Medical Center with public released OASIS and CANDI datsets.
+Training and testing data are MRI T1-weighted (T1w) 3D volumes coming from 3 different sites. There are a total of 133 classes in the whole brain segmentation task.
+Among 50 T1w MRI scans from Open Access Series on Imaging Studies (OASIS) (Marcus et al., 2007) dataset, 45 scans are used for training and the other 5 for validation.
+ The testing cohort contains Colin27 T1w scan (Aubert-Broche et al., 2006) and 13 T1w MRI scans from the Child and Adolescent Neuro Development Initiative (CANDI)
+ (Kennedy et al., 2012). All data are registered to the MNI space using the MNI305 (Evans et al., 1993) template and preprocessed follow the method in (Huo et al., 2019). Input images are randomly cropped to the size of 96 × 96 × 96.
+### Important
+The brain MRI images for training are registered to Affine registration from the target image to the MNI305 template using NiftyReg.
+The data should be in the MNI305 space before inference.
+If your images are already in MNI space, skip the registration step.
+You could use any resitration tool to register image to MNI space. Here is an example using ants.
+Registration to MNI Space: Sample suggestion. E.g., use ANTS or other tools for registering T1 MRI image to MNI305 Space.
+```
+pip install antspyx
+#Sample ANTS registration
+import ants
+import sys
+import os
+fixed_image = ants.image_read('<fixed_image_path>')
+moving_image = ants.image_read('<moving_image_path>')
+transform = ants.registration(fixed_image,moving_image,'Affine')
+reg3t = ants.apply_transforms(fixed_image,moving_image,transform['fwdtransforms'][0])
+ants.image_write(reg3t,output_image_path)
+```
+## Training configuration
+The training and inference was performed with at least one 24GB-memory GPU.
+Actual Model Input: 96 x 96 x 96
+## Input and output formats
+Input: 1 channel T1w MRI image in MNI305 Space.
+## commands example
+Download trained checkpoint model to ./model/model.pt:
+Add scripts component:  To run the workflow with customized components, PYTHONPATH should be revised to include the path to the customized component:
+```
+export PYTHONPATH=$PYTHONPATH: '<path to the bundle root dir>/'
+```
+Execute Training:
+```
+python -m monai.bundle run training --meta_file configs/metadata.json --config_file configs/train.json --logging_file configs/logging.conf
+```
+Execute inference:
+```
+python -m monai.bundle run evaluating --meta_file configs/metadata.json --config_file configs/inference.json --logging_file configs/logging.conf
+```
+## More examples output
+![](./wholebrain.png) <br>
+<p align="center">
+Fig.3 - The output prediction comparison with variant and ground truth
+</p>
+## Training/Validation Benchmarking
+A graph showing the training accuracy for fine-tuning 600 epochs.
+![](./training.png) <br>
+With 10 fine-tuned labels, the training process converges fast.
+## Complete ROI of the whole brain segmentation
+133 brain structures are segmented.
+| #1 | #2 | #3 | #4 |
+| :------------ | :---------- | :-------- |  :-------- |
+| 0:  background  |  1 :  3rd-Ventricle  | 2 :  4th-Ventricle  |  3 :  Right-Accumbens-Area  |
+|  4 :  Left-Accumbens-Area  |  5 :  Right-Amygdala  |  6 :  Left-Amygdala  |  7 :  Brain-Stem  |
+|  8 :  Right-Caudate  |  9 :  Left-Caudate  |  10 :  Right-Cerebellum-Exterior  |  11 :  Left-Cerebellum-Exterior  |
+|  12 :  Right-Cerebellum-White-Matter  |  13 :  Left-Cerebellum-White-Matter  |  14 :  Right-Cerebral-White-Matter  | 15 :  Left-Cerebral-White-Matter  |
+|  16 :  Right-Hippocampus  |  17 :  Left-Hippocampus  |  18 :  Right-Inf-Lat-Vent  |  19 :  Left-Inf-Lat-Vent  |
+|  20 :  Right-Lateral-Ventricle  |  21 :  Left-Lateral-Ventricle  |  22 :  Right-Pallidum  |  23 :  Left-Pallidum  |
+|  24 :  Right-Putamen  |  25 :  Left-Putamen  | 26 :  Right-Thalamus-Proper  |  27 :  Left-Thalamus-Proper  |
+|  28 :  Right-Ventral-DC  |  29 :  Left-Ventral-DC  |  30 :  Cerebellar-Vermal-Lobules-I-V  |  31 :  Cerebellar-Vermal-Lobules-VI-VII  |
+|  32 :  Cerebellar-Vermal-Lobules-VIII-X  |  33 :  Left-Basal-Forebrain  |  34 :  Right-Basal-Forebrain  |  35 :  Right-ACgG--anterior-cingulate-gyrus  |
+|  36 :  Left-ACgG--anterior-cingulate-gyrus  |  37 :  Right-AIns--anterior-insula  |  38 :  Left-AIns--anterior-insula  | 39 :  Right-AOrG--anterior-orbital-gyrus |
+| 40 :  Left-AOrG--anterior-orbital-gyrus  |  41 :  Right-AnG---angular-gyrus  |  42 :  Left-AnG---angular-gyrus  |  43 :  Right-Calc--calcarine-cortex  |
+|  44 :  Left-Calc--calcarine-cortex  |  45 :  Right-CO----central-operculum  |  46 :  Left-CO----central-operculum  |  47 :  Right-Cun---cuneus  |
+|  48 :  Left-Cun---cuneus  |  49 :  Right-Ent---entorhinal-area  |  50 :  Left-Ent---entorhinal-area  |  51 :  Right-FO----frontal-operculum  |
+|  52 :  Left-FO----frontal-operculum  |  53 :  Right-FRP---frontal-pole  |  54 :  Left-FRP---frontal-pole  |  55 :  Right-FuG---fusiform-gyrus   |
+|  56 :  Left-FuG---fusiform-gyrus  |  57 :  Right-GRe---gyrus-rectus  |  58 :  Left-GRe---gyrus-rectus  |  59 :  Right-IOG---inferior-occipital-gyrus ,
+|  60 :  Left-IOG---inferior-occipital-gyrus  |  61 :  Right-ITG---inferior-temporal-gyrus  |  62 :  Left-ITG---inferior-temporal-gyrus  | 63 :  Right-LiG---lingual-gyrus  |
+|  64 :  Left-LiG---lingual-gyrus  |  65 :  Right-LOrG--lateral-orbital-gyrus  |  66 :  Left-LOrG--lateral-orbital-gyrus  |  67 :  Right-MCgG--middle-cingulate-gyrus  |
+|  68 :  Left-MCgG--middle-cingulate-gyrus  |  69 :  Right-MFC---medial-frontal-cortex  |  70 :  Left-MFC---medial-frontal-cortex  |  71 :  Right-MFG---middle-frontal-gyrus  |
+|  72 :  Left-MFG---middle-frontal-gyrus  |  73 :  Right-MOG---middle-occipital-gyrus  |  74 :  Left-MOG---middle-occipital-gyrus  |  75 :  Right-MOrG--medial-orbital-gyrus  |
+|  76 :  Left-MOrG--medial-orbital-gyrus  |  77 :  Right-MPoG--postcentral-gyrus  |  78 :  Left-MPoG--postcentral-gyrus  |  79 :  Right-MPrG--precentral-gyrus  |
+|  80 :  Left-MPrG--precentral-gyrus  |  81 :  Right-MSFG--superior-frontal-gyrus  |  82 :  Left-MSFG--superior-frontal-gyrus  |  83 :  Right-MTG---middle-temporal-gyrus  |
+|  84 :  Left-MTG---middle-temporal-gyrus  |  85 :  Right-OCP---occipital-pole  |  86 :  Left-OCP---occipital-pole  |  87 :  Right-OFuG--occipital-fusiform-gyrus  |
+|  88 :  Left-OFuG--occipital-fusiform-gyrus  |  89 :  Right-OpIFG-opercular-part-of-the-IFG  |  90 :  Left-OpIFG-opercular-part-of-the-IFG  |  91 :  Right-OrIFG-orbital-part-of-the-IFG  |
+|  92 :  Left-OrIFG-orbital-part-of-the-IFG  |  93 :  Right-PCgG--posterior-cingulate-gyrus  |  94 :  Left-PCgG--posterior-cingulate-gyrus  |  95 :  Right-PCu---precuneus  |
+|  96 :  Left-PCu---precuneus  |  97 :  Right-PHG---parahippocampal-gyrus  |  98 :  Left-PHG---parahippocampal-gyrus  |  99 :  Right-PIns--posterior-insula  |
+|  100 :  Left-PIns--posterior-insula  |  101 :  Right-PO----parietal-operculum  |  102 :  Left-PO----parietal-operculum  |  103 :  Right-PoG---postcentral-gyrus  |
+|  104 :  Left-PoG---postcentral-gyrus  |  105 :  Right-POrG--posterior-orbital-gyrus  |  106 :  Left-POrG--posterior-orbital-gyrus  |  107 :  Right-PP----planum-polare  |
+|  108 :  Left-PP----planum-polare  |  109 :  Right-PrG---precentral-gyrus  |  110 :  Left-PrG---precentral-gyrus  |  111 :  Right-PT----planum-temporale  |
+|  112 :  Left-PT----planum-temporale  |  113 :  Right-SCA---subcallosal-area  |  114 :  Left-SCA---subcallosal-area  |  115 :  Right-SFG---superior-frontal-gyrus  |
+|  116 :  Left-SFG---superior-frontal-gyrus  |  117 :  Right-SMC---supplementary-motor-cortex  |  118 :  Left-SMC---supplementary-motor-cortex  |  119 :  Right-SMG---supramarginal-gyrus  |
+|  120 :  Left-SMG---supramarginal-gyrus  |  121 :  Right-SOG---superior-occipital-gyrus  |  122 :  Left-SOG---superior-occipital-gyrus  |  123 :  Right-SPL---superior-parietal-lobule  |
+|  124 :  Left-SPL---superior-parietal-lobule  |  125 :  Right-STG---superior-temporal-gyrus  |  126 :  Left-STG---superior-temporal-gyrus  |  127 :  Right-TMP---temporal-pole  |
+|  128 :  Left-TMP---temporal-pole  |  129 :  Right-TrIFG-triangular-part-of-the-IFG  |  130 :  Left-TrIFG-triangular-part-of-the-IFG  |  131 :  Right-TTG---transverse-temporal-gyrus  |
+|  132 :  Left-TTG---transverse-temporal-gyrus  |
+## Bundle Integration in MONAI Lable
+The inference and training pipleine can be easily used by the MONAI Label server and 3D Slicer for fast labeling T1w MRI images in MNI space.
+![](./3DSlicer_use.png) <br>
+# Disclaimer
+This is an example, not to be used for diagnostic purposes.
+# References
+[1] Yu, Xin, Yinchi Zhou, Yucheng Tang et al.  Characterizing Renal Structures with 3D Block Aggregate Transformers.  arXiv preprint arXiv:2203.02430 (2022). https://arxiv.org/pdf/2203.02430.pdf
+[2] Zizhao Zhang et al.  Nested Hierarchical Transformer: Towards Accurate, Data-Efficient and Interpretable Visual Understanding.  AAAI Conference on Artificial Intelligence (AAAI) 2022
+[3] Huo, Yuankai, et al.  3D whole brain segmentation using spatially localized atlas network tiles.  NeuroImage 194 (2019): 105-119.
+# License
+Copyright (c) MONAI Consortium
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

docs/demo.png ADDED Viewed

Git LFS Details

SHA256: 92aae2d9b2901de18b445d6e6efdf48b6c3d8bb5e66ee55c3fde152e13f952f7
Pointer size: 132 Bytes
Size of remote file: 1.21 MB

docs/training.png ADDED Viewed

docs/unest.png ADDED Viewed

docs/wholebrain.png ADDED Viewed

Git LFS Details

SHA256: bb2e981296ea8f1ae12ab4e7cda15c3694ea78d151287879bdfd257f1ca7c587
Pointer size: 131 Bytes
Size of remote file: 132 kB

models/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79a52ccd77bc35d05410f39788a1b063af3eb3b809b42241335c18aed27ec422
+size 348901503

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

scripts/networks/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

scripts/networks/nest/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+#!/usr/bin/env python3
+from .utils import (
+    Conv3dSame,
+    DropPath,
+    Linear,
+    Mlp,
+    _assert,
+    conv3d_same,
+    create_conv3d,
+    create_pool3d,
+    get_padding,
+    get_same_padding,
+    pad_same,
+    to_ntuple,
+    trunc_normal_,
+)

scripts/networks/nest/utils.py ADDED Viewed

	@@ -0,0 +1,481 @@

+#!/usr/bin/env python3
+import collections.abc
+import math
+import warnings
+from itertools import repeat
+from typing import List, Optional, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+try:
+    from torch import _assert
+except ImportError:
+    def _assert(condition: bool, message: str):
+        assert condition, message
+def drop_block_2d(
+    x,
+    drop_prob: float = 0.1,
+    block_size: int = 7,
+    gamma_scale: float = 1.0,
+    with_noise: bool = False,
+    inplace: bool = False,
+    batchwise: bool = False,
+):
+    """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    DropBlock with an experimental gaussian noise option. This layer has been tested on a few training
+    runs with success, but needs further validation and possibly optimization for lower runtime impact.
+    """
+    b, c, h, w = x.shape
+    total_size = w * h
+    clipped_block_size = min(block_size, min(w, h))
+    # seed_drop_rate, the gamma parameter
+    gamma = gamma_scale * drop_prob * total_size / clipped_block_size**2 / ((w - block_size + 1) * (h - block_size + 1))
+    # Forces the block to be inside the feature map.
+    w_i, h_i = torch.meshgrid(torch.arange(w).to(x.device), torch.arange(h).to(x.device))
+    valid_block = ((w_i >= clipped_block_size // 2) & (w_i < w - (clipped_block_size - 1) // 2)) & (
+        (h_i >= clipped_block_size // 2) & (h_i < h - (clipped_block_size - 1) // 2)
+    )
+    valid_block = torch.reshape(valid_block, (1, 1, h, w)).to(dtype=x.dtype)
+    if batchwise:
+        # one mask for whole batch, quite a bit faster
+        uniform_noise = torch.rand((1, c, h, w), dtype=x.dtype, device=x.device)
+    else:
+        uniform_noise = torch.rand_like(x)
+    block_mask = ((2 - gamma - valid_block + uniform_noise) >= 1).to(dtype=x.dtype)
+    block_mask = -F.max_pool2d(
+        -block_mask, kernel_size=clipped_block_size, stride=1, padding=clipped_block_size // 2  # block_size,
+    )
+    if with_noise:
+        normal_noise = torch.randn((1, c, h, w), dtype=x.dtype, device=x.device) if batchwise else torch.randn_like(x)
+        if inplace:
+            x.mul_(block_mask).add_(normal_noise * (1 - block_mask))
+        else:
+            x = x * block_mask + normal_noise * (1 - block_mask)
+    else:
+        normalize_scale = (block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-7)).to(x.dtype)
+        if inplace:
+            x.mul_(block_mask * normalize_scale)
+        else:
+            x = x * block_mask * normalize_scale
+    return x
+def drop_block_fast_2d(
+    x: torch.Tensor,
+    drop_prob: float = 0.1,
+    block_size: int = 7,
+    gamma_scale: float = 1.0,
+    with_noise: bool = False,
+    inplace: bool = False,
+):
+    """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    DropBlock with an experimental gaussian noise option. Simplied from above without concern for valid
+    block mask at edges.
+    """
+    b, c, h, w = x.shape
+    total_size = w * h
+    clipped_block_size = min(block_size, min(w, h))
+    gamma = gamma_scale * drop_prob * total_size / clipped_block_size**2 / ((w - block_size + 1) * (h - block_size + 1))
+    block_mask = torch.empty_like(x).bernoulli_(gamma)
+    block_mask = F.max_pool2d(
+        block_mask.to(x.dtype), kernel_size=clipped_block_size, stride=1, padding=clipped_block_size // 2
+    )
+    if with_noise:
+        normal_noise = torch.empty_like(x).normal_()
+        if inplace:
+            x.mul_(1.0 - block_mask).add_(normal_noise * block_mask)
+        else:
+            x = x * (1.0 - block_mask) + normal_noise * block_mask
+    else:
+        block_mask = 1 - block_mask
+        normalize_scale = (block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-6)).to(dtype=x.dtype)
+        if inplace:
+            x.mul_(block_mask * normalize_scale)
+        else:
+            x = x * block_mask * normalize_scale
+    return x
+class DropBlock2d(nn.Module):
+    """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf"""
+    def __init__(
+        self, drop_prob=0.1, block_size=7, gamma_scale=1.0, with_noise=False, inplace=False, batchwise=False, fast=True
+    ):
+        super(DropBlock2d, self).__init__()
+        self.drop_prob = drop_prob
+        self.gamma_scale = gamma_scale
+        self.block_size = block_size
+        self.with_noise = with_noise
+        self.inplace = inplace
+        self.batchwise = batchwise
+        self.fast = fast  # FIXME finish comparisons of fast vs not
+    def forward(self, x):
+        if not self.training or not self.drop_prob:
+            return x
+        if self.fast:
+            return drop_block_fast_2d(
+                x, self.drop_prob, self.block_size, self.gamma_scale, self.with_noise, self.inplace
+            )
+        else:
+            return drop_block_2d(
+                x, self.drop_prob, self.block_size, self.gamma_scale, self.with_noise, self.inplace, self.batchwise
+            )
+def drop_path(x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+    if keep_prob > 0.0 and scale_by_keep:
+        random_tensor.div_(keep_prob)
+    return x * random_tensor
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
+    def __init__(self, drop_prob=None, scale_by_keep=True):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
+def create_conv3d(in_channels, out_channels, kernel_size, **kwargs):
+    """Select a 2d convolution implementation based on arguments
+    Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv3d, or CondConv2d.
+    Used extensively by EfficientNet, MobileNetv3 and related networks.
+    """
+    depthwise = kwargs.pop("depthwise", False)
+    # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0
+    groups = in_channels if depthwise else kwargs.pop("groups", 1)
+    m = create_conv3d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs)
+    return m
+def conv3d_same(
+    x,
+    weight: torch.Tensor,
+    bias: Optional[torch.Tensor] = None,
+    stride: Tuple[int, int] = (1, 1, 1),
+    padding: Tuple[int, int] = (0, 0, 0),
+    dilation: Tuple[int, int] = (1, 1, 1),
+    groups: int = 1,
+):
+    x = pad_same(x, weight.shape[-3:], stride, dilation)
+    return F.conv3d(x, weight, bias, stride, (0, 0, 0), dilation, groups)
+class Conv3dSame(nn.Conv2d):
+    """Tensorflow like 'SAME' convolution wrapper for 2D convolutions"""
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
+        super(Conv3dSame, self).__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
+    def forward(self, x):
+        return conv3d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+def create_conv3d_pad(in_chs, out_chs, kernel_size, **kwargs):
+    padding = kwargs.pop("padding", "")
+    kwargs.setdefault("bias", False)
+    padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs)
+    if is_dynamic:
+        return Conv3dSame(in_chs, out_chs, kernel_size, **kwargs)
+    else:
+        return nn.Conv3d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
+# Calculate symmetric padding for a convolution
+def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int:
+    padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
+    return padding
+# Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution
+def get_same_padding(x: int, k: int, s: int, d: int):
+    return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0)
+# Can SAME padding for given args be done statically?
+def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_):
+    return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0
+# Dynamically pad input x with 'SAME' padding for conv with specified args
+def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1, 1), value: float = 0):
+    id, ih, iw = x.size()[-3:]
+    pad_d, pad_h, pad_w = (
+        get_same_padding(id, k[0], s[0], d[0]),
+        get_same_padding(ih, k[1], s[1], d[1]),
+        get_same_padding(iw, k[2], s[2], d[2]),
+    )
+    if pad_d > 0 or pad_h > 0 or pad_w > 0:
+        x = F.pad(
+            x,
+            [pad_d // 2, pad_d - pad_d // 2, pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2],
+            value=value,
+        )
+    return x
+def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]:
+    dynamic = False
+    if isinstance(padding, str):
+        # for any string padding, the padding will be calculated for you, one of three ways
+        padding = padding.lower()
+        if padding == "same":
+            # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact
+            if is_static_pad(kernel_size, **kwargs):
+                # static case, no extra overhead
+                padding = get_padding(kernel_size, **kwargs)
+            else:
+                # dynamic 'SAME' padding, has runtime/GPU memory overhead
+                padding = 0
+                dynamic = True
+        elif padding == "valid":
+            # 'VALID' padding, same as padding=0
+            padding = 0
+        else:
+            # Default to PyTorch style 'same'-ish symmetric padding
+            padding = get_padding(kernel_size, **kwargs)
+    return padding, dynamic
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple
+def make_divisible(v, divisor=8, min_value=None, round_limit=0.9):
+    min_value = min_value or divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < round_limit * v:
+        new_v += divisor
+    return new_v
+class Linear(nn.Linear):
+    r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
+    Wraps torch.nn.Linear to support AMP + torchscript usage by manually casting
+    weight & bias to input.dtype to work around an issue w/ torch.addmm in this use case.
+    """
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        if torch.jit.is_scripting():
+            bias = self.bias.to(dtype=input.dtype) if self.bias is not None else None
+            return F.linear(input, self.weight.to(dtype=input.dtype), bias=bias)
+        else:
+            return F.linear(input, self.weight, self.bias)
+class Mlp(nn.Module):
+    """MLP as used in Vision Transformer, MLP-Mixer and related networks"""
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        drop_probs = to_2tuple(drop)
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.drop1 = nn.Dropout(drop_probs[0])
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop2 = nn.Dropout(drop_probs[1])
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop1(x)
+        x = self.fc2(x)
+        x = self.drop2(x)
+        return x
+def avg_pool3d_same(
+    x,
+    kernel_size: List[int],
+    stride: List[int],
+    padding: List[int] = (0, 0, 0),
+    ceil_mode: bool = False,
+    count_include_pad: bool = True,
+):
+    # FIXME how to deal with count_include_pad vs not for external padding?
+    x = pad_same(x, kernel_size, stride)
+    return F.avg_pool3d(x, kernel_size, stride, (0, 0, 0), ceil_mode, count_include_pad)
+class AvgPool3dSame(nn.AvgPool2d):
+    """Tensorflow like 'SAME' wrapper for 2D average pooling"""
+    def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True):
+        kernel_size = to_2tuple(kernel_size)
+        stride = to_2tuple(stride)
+        super(AvgPool3dSame, self).__init__(kernel_size, stride, (0, 0, 0), ceil_mode, count_include_pad)
+    def forward(self, x):
+        x = pad_same(x, self.kernel_size, self.stride)
+        return F.avg_pool3d(x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad)
+def max_pool3d_same(
+    x,
+    kernel_size: List[int],
+    stride: List[int],
+    padding: List[int] = (0, 0, 0),
+    dilation: List[int] = (1, 1, 1),
+    ceil_mode: bool = False,
+):
+    x = pad_same(x, kernel_size, stride, value=-float("inf"))
+    return F.max_pool3d(x, kernel_size, stride, (0, 0, 0), dilation, ceil_mode)
+class MaxPool3dSame(nn.MaxPool2d):
+    """Tensorflow like 'SAME' wrapper for 3D max pooling"""
+    def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False):
+        kernel_size = to_2tuple(kernel_size)
+        stride = to_2tuple(stride)
+        dilation = to_2tuple(dilation)
+        super(MaxPool3dSame, self).__init__(kernel_size, stride, (0, 0, 0), dilation, ceil_mode)
+    def forward(self, x):
+        x = pad_same(x, self.kernel_size, self.stride, value=-float("inf"))
+        return F.max_pool3d(x, self.kernel_size, self.stride, (0, 0, 0), self.dilation, self.ceil_mode)
+def create_pool3d(pool_type, kernel_size, stride=None, **kwargs):
+    stride = stride or kernel_size
+    padding = kwargs.pop("padding", "")
+    padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs)
+    if is_dynamic:
+        if pool_type == "avg":
+            return AvgPool3dSame(kernel_size, stride=stride, **kwargs)
+        elif pool_type == "max":
+            return MaxPool3dSame(kernel_size, stride=stride, **kwargs)
+        else:
+            raise AssertionError()
+            # assert False, f"Unsupported pool type {pool_type}"
+    else:
+        if pool_type == "avg":
+            return nn.AvgPool3d(kernel_size, stride=stride, padding=padding, **kwargs)
+        elif pool_type == "max":
+            return nn.MaxPool3d(kernel_size, stride=stride, padding=padding, **kwargs)
+        else:
+            raise AssertionError()
+            # assert False, f"Unsupported pool type {pool_type}"
+def _float_to_int(x: float) -> int:
+    """
+    Symbolic tracing helper to substitute for inbuilt `int`.
+    Hint: Inbuilt `int` can't accept an argument of type `Proxy`
+    """
+    return int(x)
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn(
+            "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+            "The distribution of values may be incorrect.",
+            stacklevel=2,
+        )
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.0))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0):
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)

scripts/networks/nest_transformer_3D.py ADDED Viewed

	@@ -0,0 +1,489 @@

+#!/usr/bin/env python3
+# =========================================================================
+# Adapted from https://github.com/google-research/nested-transformer.
+# which has the following license...
+# https://github.com/pytorch/vision/blob/main/LICENSE
+#
+# BSD 3-Clause License
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of the copyright holder nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+""" Nested Transformer (NesT) in PyTorch
+A PyTorch implement of Aggregating Nested Transformers as described in:
+'Aggregating Nested Transformers'
+    - https://arxiv.org/abs/2105.12723
+The official Jax code is released and available at https://github.com/google-research/nested-transformer.
+The weights have been converted with convert/convert_nest_flax.py
+Acknowledgments:
+* The paper authors for sharing their research, code, and model weights
+* Ross Wightman's existing code off which I based this
+Copyright 2021 Alexander Soare
+"""
+import collections.abc
+import logging
+import math
+from functools import partial
+from typing import Callable, Sequence
+import torch
+import torch.nn.functional as F
+from torch import nn
+from .nest import DropPath, Mlp, _assert, create_conv3d, create_pool3d, to_ntuple, trunc_normal_
+from .patchEmbed3D import PatchEmbed3D
+_logger = logging.getLogger(__name__)
+class Attention(nn.Module):
+    """
+    This is much like `.vision_transformer.Attention` but uses *localised* self attention by accepting an input with
+     an extra "image block" dim
+    """
+    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0.0, proj_drop=0.0):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = head_dim**-0.5
+        self.qkv = nn.Linear(dim, 3 * dim, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        """
+        x is shape: B (batch_size), T (image blocks), N (seq length per image block), C (embed dim)
+        """
+        b, t, n, c = x.shape
+        # result of next line is (qkv, B, num (H)eads, T, N, (C')hannels per head)
+        qkv = self.qkv(x).reshape(b, t, n, 3, self.num_heads, c // self.num_heads).permute(3, 0, 4, 1, 2, 5)
+        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
+        attn = (q @ k.transpose(-2, -1)) * self.scale  # (B, H, T, N, N)
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).permute(0, 2, 3, 4, 1).reshape(b, t, n, c)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x  # (B, T, N, C)
+class TransformerLayer(nn.Module):
+    """
+    This is much like `.vision_transformer.Block` but:
+        - Called TransformerLayer here to allow for "block" as defined in the paper ("non-overlapping image blocks")
+        - Uses modified Attention layer that handles the "block" dimension
+    """
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x):
+        y = self.norm1(x)
+        x = x + self.drop_path(self.attn(y))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class ConvPool(nn.Module):
+    def __init__(self, in_channels, out_channels, norm_layer, pad_type=""):
+        super().__init__()
+        self.conv = create_conv3d(in_channels, out_channels, kernel_size=3, padding=pad_type, bias=True)
+        self.norm = norm_layer(out_channels)
+        self.pool = create_pool3d("max", kernel_size=3, stride=2, padding=pad_type)
+    def forward(self, x):
+        """
+        x is expected to have shape (B, C, D, H, W)
+        """
+        _assert(x.shape[-3] % 2 == 0, "BlockAggregation requires even input spatial dims")
+        _assert(x.shape[-2] % 2 == 0, "BlockAggregation requires even input spatial dims")
+        _assert(x.shape[-1] % 2 == 0, "BlockAggregation requires even input spatial dims")
+        # print('In ConvPool x : {}'.format(x.shape))
+        x = self.conv(x)
+        # Layer norm done over channel dim only
+        x = self.norm(x.permute(0, 2, 3, 4, 1)).permute(0, 4, 1, 2, 3)
+        x = self.pool(x)
+        return x  # (B, C, D//2, H//2, W//2)
+def blockify(x, block_size: int):
+    """image to blocks
+    Args:
+        x (Tensor): with shape (B, D, H, W, C)
+        block_size (int): edge length of a single square block in units of D, H, W
+    """
+    b, d, h, w, c = x.shape
+    _assert(d % block_size == 0, "`block_size` must divide input depth evenly")
+    _assert(h % block_size == 0, "`block_size` must divide input height evenly")
+    _assert(w % block_size == 0, "`block_size` must divide input width evenly")
+    grid_depth = d // block_size
+    grid_height = h // block_size
+    grid_width = w // block_size
+    x = x.reshape(b, grid_depth, block_size, grid_height, block_size, grid_width, block_size, c)
+    x = x.permute(0, 1, 3, 5, 2, 4, 6, 7).reshape(
+        b, grid_depth * grid_height * grid_width, -1, c
+    )  # shape [2, 512, 27, 128]
+    return x  # (B, T, N, C)
+# @register_notrace_function  # reason: int receives Proxy
+def deblockify(x, block_size: int):
+    """blocks to image
+    Args:
+        x (Tensor): with shape (B, T, N, C) where T is number of blocks and N is sequence size per block
+        block_size (int): edge length of a single square block in units of desired D, H, W
+    """
+    b, t, _, c = x.shape
+    grid_size = round(math.pow(t, 1 / 3))
+    depth = height = width = grid_size * block_size
+    x = x.reshape(b, grid_size, grid_size, grid_size, block_size, block_size, block_size, c)
+    x = x.permute(0, 1, 4, 2, 5, 3, 6, 7).reshape(b, depth, height, width, c)
+    return x  # (B, D, H, W, C)
+class NestLevel(nn.Module):
+    """Single hierarchical level of a Nested Transformer"""
+    def __init__(
+        self,
+        num_blocks,
+        block_size,
+        seq_length,
+        num_heads,
+        depth,
+        embed_dim,
+        prev_embed_dim=None,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rates: Sequence[int] = (),
+        norm_layer=None,
+        act_layer=None,
+        pad_type="",
+    ):
+        super().__init__()
+        self.block_size = block_size
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_blocks, seq_length, embed_dim))
+        if prev_embed_dim is not None:
+            self.pool = ConvPool(prev_embed_dim, embed_dim, norm_layer=norm_layer, pad_type=pad_type)
+        else:
+            self.pool = nn.Identity()
+        # Transformer encoder
+        if len(drop_path_rates):
+            assert len(drop_path_rates) == depth, "Must provide as many drop path rates as there are transformer layers"
+        self.transformer_encoder = nn.Sequential(
+            *[
+                TransformerLayer(
+                    dim=embed_dim,
+                    num_heads=num_heads,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=drop_path_rates[i],
+                    norm_layer=norm_layer,
+                    act_layer=act_layer,
+                )
+                for i in range(depth)
+            ]
+        )
+    def forward(self, x):
+        """
+        expects x as (B, C, D, H, W)
+        """
+        x = self.pool(x)
+        x = x.permute(0, 2, 3, 4, 1)  # (B, H', W', C), switch to channels last for transformer
+        x = blockify(x, self.block_size)  # (B, T, N, C')
+        x = x + self.pos_embed
+        x = self.transformer_encoder(x)  # (B, ,T, N, C')
+        x = deblockify(x, self.block_size)  # (B, D', H', W', C') [2, 24, 24, 24, 128]
+        # Channel-first for block aggregation, and generally to replicate convnet feature map at each stage
+        return x.permute(0, 4, 1, 2, 3)  # (B, C, D', H', W')
+class NestTransformer3D(nn.Module):
+    """Nested Transformer (NesT)
+    A PyTorch impl of : `Aggregating Nested Transformers`
+        - https://arxiv.org/abs/2105.12723
+    """
+    def __init__(
+        self,
+        img_size=96,
+        in_chans=1,
+        patch_size=2,
+        num_levels=3,
+        embed_dims=(128, 256, 512),
+        num_heads=(4, 8, 16),
+        depths=(2, 2, 20),
+        num_classes=1000,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.5,
+        norm_layer=None,
+        act_layer=None,
+        pad_type="",
+        weight_init="",
+        global_pool="avg",
+    ):
+        """
+        Args:
+            img_size (int, tuple): input image size
+            in_chans (int): number of input channels
+            patch_size (int): patch size
+            num_levels (int): number of block hierarchies (T_d in the paper)
+            embed_dims (int, tuple): embedding dimensions of each level
+            num_heads (int, tuple): number of attention heads for each level
+            depths (int, tuple): number of transformer layers for each level
+            num_classes (int): number of classes for classification head
+            mlp_ratio (int): ratio of mlp hidden dim to embedding dim for MLP of transformer layers
+            qkv_bias (bool): enable bias for qkv if True
+            drop_rate (float): dropout rate for MLP of transformer layers, MSA final projection layer, and classifier
+            attn_drop_rate (float): attention dropout rate
+            drop_path_rate (float): stochastic depth rate
+            norm_layer: (nn.Module): normalization layer for transformer layers
+            act_layer: (nn.Module): activation layer in MLP of transformer layers
+            pad_type: str: Type of padding to use '' for PyTorch symmetric, 'same' for TF SAME
+            weight_init: (str): weight init scheme
+            global_pool: (str): type of pooling operation to apply to final feature map
+        Notes:
+            - Default values follow NesT-B from the original Jax code.
+            - `embed_dims`, `num_heads`, `depths` should be ints or tuples with length `num_levels`.
+            - For those following the paper, Table A1 may have errors!
+                - https://github.com/google-research/nested-transformer/issues/2
+        """
+        super().__init__()
+        for param_name in ["embed_dims", "num_heads", "depths"]:
+            param_value = locals()[param_name]
+            if isinstance(param_value, collections.abc.Sequence):
+                assert len(param_value) == num_levels, f"Require `len({param_name}) == num_levels`"
+        embed_dims = to_ntuple(num_levels)(embed_dims)
+        num_heads = to_ntuple(num_levels)(num_heads)
+        depths = to_ntuple(num_levels)(depths)
+        self.num_classes = num_classes
+        self.num_features = embed_dims[-1]
+        self.feature_info = []
+        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+        act_layer = act_layer or nn.GELU
+        self.drop_rate = drop_rate
+        self.num_levels = num_levels
+        if isinstance(img_size, collections.abc.Sequence):
+            assert img_size[0] == img_size[1], "Model only handles square inputs"
+            img_size = img_size[0]
+        assert img_size % patch_size == 0, "`patch_size` must divide `img_size` evenly"
+        self.patch_size = patch_size
+        # Number of blocks at each level
+        self.num_blocks = (8 ** torch.arange(num_levels)).flip(0).tolist()
+        assert (img_size // patch_size) % round(
+            math.pow(self.num_blocks[0], 1 / 3)
+        ) == 0, "First level blocks don't fit evenly. Check `img_size`, `patch_size`, and `num_levels`"
+        # Block edge size in units of patches
+        # Hint: (img_size // patch_size) gives number of patches along edge of image. sqrt(self.num_blocks[0]) is the
+        #  number of blocks along edge of image
+        self.block_size = int((img_size // patch_size) // round(math.pow(self.num_blocks[0], 1 / 3)))
+        # Patch embedding
+        self.patch_embed = PatchEmbed3D(
+            img_size=[img_size, img_size, img_size],
+            patch_size=[patch_size, patch_size, patch_size],
+            in_chans=in_chans,
+            embed_dim=embed_dims[0],
+        )
+        self.num_patches = self.patch_embed.num_patches
+        self.seq_length = self.num_patches // self.num_blocks[0]
+        # Build up each hierarchical level
+        levels = []
+        dp_rates = [x.tolist() for x in torch.linspace(0, drop_path_rate, sum(depths)).split(depths)]
+        prev_dim = None
+        curr_stride = 4
+        for i in range(len(self.num_blocks)):
+            dim = embed_dims[i]
+            levels.append(
+                NestLevel(
+                    self.num_blocks[i],
+                    self.block_size,
+                    self.seq_length,
+                    num_heads[i],
+                    depths[i],
+                    dim,
+                    prev_dim,
+                    mlp_ratio,
+                    qkv_bias,
+                    drop_rate,
+                    attn_drop_rate,
+                    dp_rates[i],
+                    norm_layer,
+                    act_layer,
+                    pad_type=pad_type,
+                )
+            )
+            self.feature_info += [dict(num_chs=dim, reduction=curr_stride, module=f"levels.{i}")]
+            prev_dim = dim
+            curr_stride *= 2
+        self.levels = nn.ModuleList([levels[i] for i in range(num_levels)])
+        # Final normalization layer
+        self.norm = norm_layer(embed_dims[-1])
+        self.init_weights(weight_init)
+    def init_weights(self, mode=""):
+        assert mode in ("nlhb", "")
+        head_bias = -math.log(self.num_classes) if "nlhb" in mode else 0.0
+        for level in self.levels:
+            trunc_normal_(level.pos_embed, std=0.02, a=-2, b=2)
+        named_apply(partial(_init_nest_weights, head_bias=head_bias), self)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {f"level.{i}.pos_embed" for i in range(len(self.levels))}
+    def get_classifier(self):
+        return self.head
+    def forward_features(self, x):
+        """x shape (B, C, D, H, W)"""
+        x = self.patch_embed(x)
+        hidden_states_out = [x]
+        for _, level in enumerate(self.levels):
+            x = level(x)
+            hidden_states_out.append(x)
+        # Layer norm done over channel dim only (to NDHWC and back)
+        x = self.norm(x.permute(0, 2, 3, 4, 1)).permute(0, 4, 1, 2, 3)
+        return x, hidden_states_out
+    def forward(self, x):
+        """x shape (B, C, D, H, W)"""
+        x = self.forward_features(x)
+        if self.drop_rate > 0.0:
+            x = F.dropout(x, p=self.drop_rate, training=self.training)
+        return x
+def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
+    if not depth_first and include_root:
+        fn(module=module, name=name)
+    for child_name, child_module in module.named_children():
+        child_name = ".".join((name, child_name)) if name else child_name
+        named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
+    if depth_first and include_root:
+        fn(module=module, name=name)
+    return module
+def _init_nest_weights(module: nn.Module, name: str = "", head_bias: float = 0.0):
+    """NesT weight initialization
+    Can replicate Jax implementation. Otherwise follows vision_transformer.py
+    """
+    if isinstance(module, nn.Linear):
+        if name.startswith("head"):
+            trunc_normal_(module.weight, std=0.02, a=-2, b=2)
+            nn.init.constant_(module.bias, head_bias)
+        else:
+            trunc_normal_(module.weight, std=0.02, a=-2, b=2)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+    elif isinstance(module, nn.Conv2d):
+        trunc_normal_(module.weight, std=0.02, a=-2, b=2)
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+    elif isinstance(module, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+        nn.init.zeros_(module.bias)
+        nn.init.ones_(module.weight)
+def resize_pos_embed(posemb, posemb_new):
+    """
+    Rescale the grid of position embeddings when loading from state_dict
+    Expected shape of position embeddings is (1, T, N, C), and considers only square images
+    """
+    _logger.info("Resized position embedding: %s to %s", posemb.shape, posemb_new.shape)
+    seq_length_old = posemb.shape[2]
+    num_blocks_new, seq_length_new = posemb_new.shape[1:3]
+    size_new = int(math.sqrt(num_blocks_new * seq_length_new))
+    # First change to (1, C, H, W)
+    posemb = deblockify(posemb, int(math.sqrt(seq_length_old))).permute(0, 3, 1, 2)
+    posemb = F.interpolate(posemb, size=[size_new, size_new], mode="bicubic", align_corners=False)
+    # Now change to new (1, T, N, C)
+    posemb = blockify(posemb.permute(0, 2, 3, 1), int(math.sqrt(seq_length_new)))
+    return posemb
+def checkpoint_filter_fn(state_dict, model):
+    """resize positional embeddings of pretrained weights"""
+    pos_embed_keys = [k for k in state_dict.keys() if k.startswith("pos_embed_")]
+    for k in pos_embed_keys:
+        if state_dict[k].shape != getattr(model, k).shape:
+            state_dict[k] = resize_pos_embed(state_dict[k], getattr(model, k))
+    return state_dict

scripts/networks/patchEmbed3D.py ADDED Viewed

	@@ -0,0 +1,190 @@

+#!/usr/bin/env python3
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from typing import Sequence, Tuple, Union
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from monai.utils import optional_import
+Rearrange, _ = optional_import("einops.layers.torch", name="Rearrange")
+class PatchEmbeddingBlock(nn.Module):
+    """
+    A patch embedding block, based on: "Dosovitskiy et al.,
+    An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
+    """
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Tuple[int, int, int],
+        patch_size: Tuple[int, int, int],
+        hidden_size: int,
+        num_heads: int,
+        pos_embed: str,
+        dropout_rate: float = 0.0,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size.
+            hidden_size: dimension of hidden layer.
+            num_heads: number of attention heads.
+            pos_embed: position embedding layer type.
+            dropout_rate: faction of the input units to drop.
+        """
+        super().__init__()
+        if not (0 <= dropout_rate <= 1):
+            raise AssertionError("dropout_rate should be between 0 and 1.")
+        if hidden_size % num_heads != 0:
+            raise AssertionError("hidden size should be divisible by num_heads.")
+        for m, p in zip(img_size, patch_size):
+            if m < p:
+                raise AssertionError("patch_size should be smaller than img_size.")
+        if pos_embed not in ["conv", "perceptron"]:
+            raise KeyError(f"Position embedding layer of type {pos_embed} is not supported.")
+        if pos_embed == "perceptron":
+            if img_size[0] % patch_size[0] != 0:
+                raise AssertionError("img_size should be divisible by patch_size for perceptron patch embedding.")
+        self.n_patches = (
+            (img_size[0] // patch_size[0]) * (img_size[1] // patch_size[1]) * (img_size[2] // patch_size[2])
+        )
+        self.patch_dim = in_channels * patch_size[0] * patch_size[1] * patch_size[2]
+        self.pos_embed = pos_embed
+        self.patch_embeddings: Union[nn.Conv3d, nn.Sequential]
+        if self.pos_embed == "conv":
+            self.patch_embeddings = nn.Conv3d(
+                in_channels=in_channels, out_channels=hidden_size, kernel_size=patch_size, stride=patch_size
+            )
+        elif self.pos_embed == "perceptron":
+            self.patch_embeddings = nn.Sequential(
+                Rearrange(
+                    "b c (h p1) (w p2) (d p3)-> b (h w d) (p1 p2 p3 c)",
+                    p1=patch_size[0],
+                    p2=patch_size[1],
+                    p3=patch_size[2],
+                ),
+                nn.Linear(self.patch_dim, hidden_size),
+            )
+        self.position_embeddings = nn.Parameter(torch.zeros(1, self.n_patches, hidden_size))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, hidden_size))
+        self.dropout = nn.Dropout(dropout_rate)
+        self.trunc_normal_(self.position_embeddings, mean=0.0, std=0.02, a=-2.0, b=2.0)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            self.trunc_normal_(m.weight, mean=0.0, std=0.02, a=-2.0, b=2.0)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def trunc_normal_(self, tensor, mean, std, a, b):
+        # From PyTorch official master until it's in a few official releases - RW
+        # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+        def norm_cdf(x):
+            return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+        with torch.no_grad():
+            l = norm_cdf((a - mean) / std)
+            u = norm_cdf((b - mean) / std)
+            tensor.uniform_(2 * l - 1, 2 * u - 1)
+            tensor.erfinv_()
+            tensor.mul_(std * math.sqrt(2.0))
+            tensor.add_(mean)
+            tensor.clamp_(min=a, max=b)
+            return tensor
+    def forward(self, x):
+        if self.pos_embed == "conv":
+            x = self.patch_embeddings(x)
+            x = x.flatten(2)
+            x = x.transpose(-1, -2)
+        elif self.pos_embed == "perceptron":
+            x = self.patch_embeddings(x)
+        embeddings = x + self.position_embeddings
+        embeddings = self.dropout(embeddings)
+        return embeddings
+class PatchEmbed3D(nn.Module):
+    """Video to Patch Embedding.
+    Args:
+        patch_size (int): Patch token size. Default: (2,4,4).
+        in_chans (int): Number of input video channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self,
+        img_size: Sequence[int] = (96, 96, 96),
+        patch_size=(4, 4, 4),
+        in_chans: int = 1,
+        embed_dim: int = 96,
+        norm_layer=None,
+    ):
+        super().__init__()
+        self.patch_size = patch_size
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1], img_size[2] // patch_size[2])
+        self.num_patches = self.grid_size[0] * self.grid_size[1] * self.grid_size[2]
+        self.proj = nn.Conv3d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        """Forward function."""
+        # padding
+        _, _, d, h, w = x.size()
+        if w % self.patch_size[2] != 0:
+            x = F.pad(x, (0, self.patch_size[2] - w % self.patch_size[2]))
+        if h % self.patch_size[1] != 0:
+            x = F.pad(x, (0, 0, 0, self.patch_size[1] - h % self.patch_size[1]))
+        if d % self.patch_size[0] != 0:
+            x = F.pad(x, (0, 0, 0, 0, 0, self.patch_size[0] - d % self.patch_size[0]))
+        x = self.proj(x)  # B C D Wh Ww
+        if self.norm is not None:
+            d, wh, ww = x.size(2), x.size(3), x.size(4)
+            x = x.flatten(2).transpose(1, 2)
+            x = self.norm(x)
+            x = x.transpose(1, 2).view(-1, self.embed_dim, d, wh, ww)
+            # pdb.set_trace()
+        return x

scripts/networks/unest_base_patch_4.py ADDED Viewed

	@@ -0,0 +1,249 @@

+#!/usr/bin/env python3
+# limitations under the License.
+"""
+The 3D NEST transformer based segmentation model
+MASI Lab, Vanderbilty University
+Authors: Xin Yu, Yinchi Zhou, Yucheng Tang, Bennett Landman
+The NEST code is partly from
+Nested Hierarchical Transformer: Towards Accurate, Data-Efficient and
+Interpretable Visual Understanding
+https://arxiv.org/pdf/2105.12723.pdf
+"""
+from typing import Sequence, Tuple, Union
+import torch
+import torch.nn as nn
+from monai.networks.blocks import Convolution
+from monai.networks.blocks.dynunet_block import UnetOutBlock
+from scripts.networks.nest_transformer_3D import NestTransformer3D
+from scripts.networks.unest_block import UNesTBlock, UNesTConvBlock, UNestUpBlock
+class UNesT(nn.Module):
+    """
+    UNesT model implementation
+    """
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        img_size: Sequence[int] = (96, 96, 96),
+        feature_size: int = 16,
+        patch_size: int = 2,
+        depths: Sequence[int] = (2, 2, 2, 2),
+        num_heads: Sequence[int] = (3, 6, 12, 24),
+        embed_dim: Sequence[int] = (128, 256, 512),
+        window_size: Sequence[int] = (7, 7, 7),
+        norm_name: Union[Tuple, str] = "instance",
+        conv_block: bool = False,
+        res_block: bool = True,
+        dropout_rate: float = 0.0,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            out_channels: dimension of output channels.
+            img_size: dimension of input image.
+            feature_size: dimension of network feature size.
+            hidden_size: dimension of hidden layer.
+            mlp_dim: dimension of feedforward layer.
+            num_heads: number of attention heads.
+            pos_embed: position embedding layer type.
+            norm_name: feature normalization type and arguments.
+            conv_block: bool argument to determine if convolutional block is used.
+            res_block: bool argument to determine if residual block is used.
+            dropout_rate: faction of the input units to drop.
+        Examples:
+            # for single channel input 4-channel output with patch size of (96,96,96), feature size of 32 and batch norm
+            >>> net = UNETR(in_channels=1, out_channels=4, img_size=(96,96,96), feature_size=32, norm_name='batch')
+            # for 4-channel input 3-channel output with patch size of (128,128,128), conv position embedding and instance norm
+            >>> net = UNETR(in_channels=4, out_channels=3, img_size=(128,128,128), pos_embed='conv', norm_name='instance')
+        """
+        super().__init__()
+        if not (0 <= dropout_rate <= 1):
+            raise AssertionError("dropout_rate should be between 0 and 1.")
+        self.embed_dim = embed_dim
+        self.nestViT = NestTransformer3D(
+            img_size=96,
+            in_chans=1,
+            patch_size=patch_size,
+            num_levels=3,
+            embed_dims=embed_dim,
+            num_heads=num_heads,
+            depths=depths,
+            num_classes=1000,
+            mlp_ratio=4.0,
+            qkv_bias=True,
+            drop_rate=0.0,
+            attn_drop_rate=0.0,
+            drop_path_rate=0.5,
+            norm_layer=None,
+            act_layer=None,
+            pad_type="",
+            weight_init="",
+            global_pool="avg",
+        )
+        self.encoder1 = UNesTConvBlock(
+            spatial_dims=3,
+            in_channels=1,
+            out_channels=feature_size * 2,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.encoder2 = UNestUpBlock(
+            spatial_dims=3,
+            in_channels=self.embed_dim[0],
+            out_channels=feature_size * 4,
+            num_layer=1,
+            kernel_size=3,
+            stride=1,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            conv_block=False,
+            res_block=False,
+        )
+        self.encoder3 = UNesTConvBlock(
+            spatial_dims=3,
+            in_channels=self.embed_dim[0],
+            out_channels=8 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.encoder4 = UNesTConvBlock(
+            spatial_dims=3,
+            in_channels=self.embed_dim[1],
+            out_channels=16 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder5 = UNesTBlock(
+            spatial_dims=3,
+            in_channels=2 * self.embed_dim[2],
+            out_channels=feature_size * 32,
+            stride=1,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder4 = UNesTBlock(
+            spatial_dims=3,
+            in_channels=self.embed_dim[2],
+            out_channels=feature_size * 16,
+            stride=1,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder3 = UNesTBlock(
+            spatial_dims=3,
+            in_channels=feature_size * 16,
+            out_channels=feature_size * 8,
+            stride=1,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder2 = UNesTBlock(
+            spatial_dims=3,
+            in_channels=feature_size * 8,
+            out_channels=feature_size * 4,
+            stride=1,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder1 = UNesTBlock(
+            spatial_dims=3,
+            in_channels=feature_size * 4,
+            out_channels=feature_size * 2,
+            stride=1,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.encoder10 = Convolution(
+            spatial_dims=3,
+            in_channels=32 * feature_size,
+            out_channels=64 * feature_size,
+            strides=2,
+            adn_ordering="ADN",
+            dropout=0.0,
+        )
+        self.out = UnetOutBlock(spatial_dims=3, in_channels=feature_size * 2, out_channels=out_channels)  # type: ignore
+    def proj_feat(self, x, hidden_size, feat_size):
+        x = x.view(x.size(0), feat_size[0], feat_size[1], feat_size[2], hidden_size)
+        x = x.permute(0, 4, 1, 2, 3).contiguous()
+        return x
+    def load_from(self, weights):
+        with torch.no_grad():
+            # copy weights from patch embedding
+            for i in weights["state_dict"]:
+                print(i)
+            self.vit.patch_embedding.position_embeddings.copy_(
+                weights["state_dict"]["module.transformer.patch_embedding.position_embeddings_3d"]
+            )
+            self.vit.patch_embedding.cls_token.copy_(
+                weights["state_dict"]["module.transformer.patch_embedding.cls_token"]
+            )
+            self.vit.patch_embedding.patch_embeddings[1].weight.copy_(
+                weights["state_dict"]["module.transformer.patch_embedding.patch_embeddings_3d.1.weight"]
+            )
+            self.vit.patch_embedding.patch_embeddings[1].bias.copy_(
+                weights["state_dict"]["module.transformer.patch_embedding.patch_embeddings_3d.1.bias"]
+            )
+            # copy weights from  encoding blocks (default: num of blocks: 12)
+            for bname, block in self.vit.blocks.named_children():
+                print(block)
+                block.loadFrom(weights, n_block=bname)
+            # last norm layer of transformer
+            self.vit.norm.weight.copy_(weights["state_dict"]["module.transformer.norm.weight"])
+            self.vit.norm.bias.copy_(weights["state_dict"]["module.transformer.norm.bias"])
+    def forward(self, x_in):
+        x, hidden_states_out = self.nestViT(x_in)
+        enc0 = self.encoder1(x_in)  # 2, 32, 96, 96, 96
+        x1 = hidden_states_out[0]  # 2, 128, 24, 24, 24     2, 128, 12, 12, 12
+        enc1 = self.encoder2(x1)  # 2, 64, 48, 48, 48 torch.Size([2, 64, 24, 24, 24])
+        x2 = hidden_states_out[1]  # 2, 128, 24, 24, 24
+        enc2 = self.encoder3(x2)  # 2, 128, 24, 24, 24 torch.Size([2, 128, 12, 12, 12])
+        x3 = hidden_states_out[2]  # 2, 256, 12, 12, 12 torch.Size([2, 256, 6, 6, 6])
+        enc3 = self.encoder4(x3)  # 2, 256, 12, 12, 12 torch.Size([2, 256, 6, 6, 6])
+        x4 = hidden_states_out[3]
+        enc4 = x4  # 2, 512, 6, 6, 6 torch.Size([2, 512, 3, 3, 3])
+        dec4 = x  # 2, 512, 6, 6, 6 torch.Size([2, 512, 3, 3, 3])
+        dec4 = self.encoder10(dec4)  # 2, 1024, 3, 3, 3 torch.Size([2, 1024, 2, 2, 2])
+        dec3 = self.decoder5(dec4, enc4)  # 2, 512, 6, 6, 6
+        dec2 = self.decoder4(dec3, enc3)  # 2, 256, 12, 12, 12
+        dec1 = self.decoder3(dec2, enc2)  # 2, 128, 24, 24, 24
+        dec0 = self.decoder2(dec1, enc1)  # 2, 64, 48, 48, 48
+        out = self.decoder1(dec0, enc0)  # 2, 32, 96, 96, 96
+        logits = self.out(out)
+        return logits

scripts/networks/unest_block.py ADDED Viewed

	@@ -0,0 +1,245 @@

+#!/usr/bin/env python3
+from typing import Sequence, Tuple, Union
+import torch
+import torch.nn as nn
+from monai.networks.blocks.dynunet_block import UnetBasicBlock, UnetResBlock, get_conv_layer
+class UNesTBlock(nn.Module):
+    """ """
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,  # type: ignore
+        kernel_size: Union[Sequence[int], int],
+        stride: Union[Sequence[int], int],
+        upsample_kernel_size: Union[Sequence[int], int],
+        norm_name: Union[Tuple, str],
+        res_block: bool = False,
+    ) -> None:
+        """
+        Args:
+            spatial_dims: number of spatial dimensions.
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            kernel_size: convolution kernel size.
+            stride: convolution stride.
+            upsample_kernel_size: convolution kernel size for transposed convolution layers.
+            norm_name: feature normalization type and arguments.
+            res_block: bool argument to determine if residual block is used.
+        """
+        super(UNesTBlock, self).__init__()
+        upsample_stride = upsample_kernel_size
+        self.transp_conv = get_conv_layer(
+            spatial_dims,
+            in_channels,
+            out_channels,
+            kernel_size=upsample_kernel_size,
+            stride=upsample_stride,
+            conv_only=True,
+            is_transposed=True,
+        )
+        if res_block:
+            self.conv_block = UnetResBlock(
+                spatial_dims,
+                out_channels + out_channels,
+                out_channels,
+                kernel_size=kernel_size,
+                stride=1,
+                norm_name=norm_name,
+            )
+        else:
+            self.conv_block = UnetBasicBlock(  # type: ignore
+                spatial_dims,
+                out_channels + out_channels,
+                out_channels,
+                kernel_size=kernel_size,
+                stride=1,
+                norm_name=norm_name,
+            )
+    def forward(self, inp, skip):
+        # number of channels for skip should equals to out_channels
+        out = self.transp_conv(inp)
+        # print(out.shape)
+        # print(skip.shape)
+        out = torch.cat((out, skip), dim=1)
+        out = self.conv_block(out)
+        return out
+class UNestUpBlock(nn.Module):
+    """ """
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        num_layer: int,
+        kernel_size: Union[Sequence[int], int],
+        stride: Union[Sequence[int], int],
+        upsample_kernel_size: Union[Sequence[int], int],
+        norm_name: Union[Tuple, str],
+        conv_block: bool = False,
+        res_block: bool = False,
+    ) -> None:
+        """
+        Args:
+            spatial_dims: number of spatial dimensions.
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            num_layer: number of upsampling blocks.
+            kernel_size: convolution kernel size.
+            stride: convolution stride.
+            upsample_kernel_size: convolution kernel size for transposed convolution layers.
+            norm_name: feature normalization type and arguments.
+            conv_block: bool argument to determine if convolutional block is used.
+            res_block: bool argument to determine if residual block is used.
+        """
+        super().__init__()
+        upsample_stride = upsample_kernel_size
+        self.transp_conv_init = get_conv_layer(
+            spatial_dims,
+            in_channels,
+            out_channels,
+            kernel_size=upsample_kernel_size,
+            stride=upsample_stride,
+            conv_only=True,
+            is_transposed=True,
+        )
+        if conv_block:
+            if res_block:
+                self.blocks = nn.ModuleList(
+                    [
+                        nn.Sequential(
+                            get_conv_layer(
+                                spatial_dims,
+                                out_channels,
+                                out_channels,
+                                kernel_size=upsample_kernel_size,
+                                stride=upsample_stride,
+                                conv_only=True,
+                                is_transposed=True,
+                            ),
+                            UnetResBlock(
+                                spatial_dims=3,
+                                in_channels=out_channels,
+                                out_channels=out_channels,
+                                kernel_size=kernel_size,
+                                stride=stride,
+                                norm_name=norm_name,
+                            ),
+                        )
+                        for i in range(num_layer)
+                    ]
+                )
+            else:
+                self.blocks = nn.ModuleList(
+                    [
+                        nn.Sequential(
+                            get_conv_layer(
+                                spatial_dims,
+                                out_channels,
+                                out_channels,
+                                kernel_size=upsample_kernel_size,
+                                stride=upsample_stride,
+                                conv_only=True,
+                                is_transposed=True,
+                            ),
+                            UnetBasicBlock(
+                                spatial_dims=3,
+                                in_channels=out_channels,
+                                out_channels=out_channels,
+                                kernel_size=kernel_size,
+                                stride=stride,
+                                norm_name=norm_name,
+                            ),
+                        )
+                        for i in range(num_layer)
+                    ]
+                )
+        else:
+            self.blocks = nn.ModuleList(
+                [
+                    get_conv_layer(
+                        spatial_dims,
+                        out_channels,
+                        out_channels,
+                        kernel_size=1,
+                        stride=1,
+                        conv_only=True,
+                        is_transposed=True,
+                    )
+                    for i in range(num_layer)
+                ]
+            )
+    def forward(self, x):
+        x = self.transp_conv_init(x)
+        for blk in self.blocks:
+            x = blk(x)
+        return x
+class UNesTConvBlock(nn.Module):
+    """
+    UNesT block with skip connections
+    """
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[Sequence[int], int],
+        stride: Union[Sequence[int], int],
+        norm_name: Union[Tuple, str],
+        res_block: bool = False,
+    ) -> None:
+        """
+        Args:
+            spatial_dims: number of spatial dimensions.
+            in_channels: number of input channels.
+            out_channels: number of output channels.
+            kernel_size: convolution kernel size.
+            stride: convolution stride.
+            norm_name: feature normalization type and arguments.
+            res_block: bool argument to determine if residual block is used.
+        """
+        super().__init__()
+        if res_block:
+            self.layer = UnetResBlock(
+                spatial_dims=spatial_dims,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                norm_name=norm_name,
+            )
+        else:
+            self.layer = UnetBasicBlock(  # type: ignore
+                spatial_dims=spatial_dims,
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                norm_name=norm_name,
+            )
+    def forward(self, inp):
+        out = self.layer(inp)
+        return out