Spaces:

harry900000
/

cosmos

Runtime error

App Files Files Community

harry900000 commited on Jul 1

Commit

ee8cb8c

1 Parent(s): e875314

pip install cosmos-transfer1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +4 -20
cosmos-transfer1/.flake8 +0 -10
cosmos-transfer1/.gitignore +0 -242
cosmos-transfer1/.pre-commit-config.yaml +0 -55
cosmos-transfer1/ATTRIBUTIONS.md +0 -1661
cosmos-transfer1/CONTRIBUTING.md +0 -51
cosmos-transfer1/Dockerfile +0 -47
cosmos-transfer1/INSTALL.md +0 -88
cosmos-transfer1/LICENSE +0 -201
cosmos-transfer1/README.md +0 -102
cosmos-transfer1/checkpoints/README.md +0 -3
cosmos-transfer1/cosmos-transfer1.yaml +0 -30
cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/inference/__init__.py +0 -0
cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/inference/depth_anything_pipeline.py +0 -55
cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/model/__init__.py +0 -0
cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/model/depth_anything.py +0 -151
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/README.md +0 -17
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/aegis/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/aegis/aegis.py +0 -135
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/aegis/categories.py +0 -192
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/blocklist/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/blocklist/blocklist.py +0 -216
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/blocklist/utils.py +0 -45
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/__init__.py +0 -0
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/core.py +0 -71
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/io_utils.py +0 -78
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/presets.py +0 -75
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/blur_utils.py +0 -35
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/face_blur_filter.py +0 -225
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/retinaface_utils.py +0 -117
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/llamaGuard3/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/llamaGuard3/categories.py +0 -31
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/llamaGuard3/llamaGuard3.py +0 -122
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/model.py +0 -60
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/video_content_safety_filter.py +0 -185
cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/vision_encoder.py +0 -46
cosmos-transfer1/cosmos_transfer1/auxiliary/human_keypoint/human_keypoint.py +0 -155
cosmos-transfer1/cosmos_transfer1/auxiliary/robot_augmentation/README.md +0 -112
cosmos-transfer1/cosmos_transfer1/auxiliary/robot_augmentation/spatial_temporal_weight.py +0 -577
cosmos-transfer1/cosmos_transfer1/auxiliary/sam2/sam2_model.py +0 -392
cosmos-transfer1/cosmos_transfer1/auxiliary/sam2/sam2_pipeline.py +0 -126
cosmos-transfer1/cosmos_transfer1/auxiliary/sam2/sam2_utils.py +0 -168
cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/__init__.py +0 -14
cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/image_cli.py +0 -188
cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/image_lib.py +0 -124
cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/utils.py +0 -402
cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/video_cli.py +0 -210

app.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import os
-import sys
 from typing import List, Tuple
 PWD = os.path.dirname(__file__)
-sys.path.append(os.path.join(PWD, "cosmos-transfer1"))
 import subprocess
 subprocess.run("pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True)
@@ -26,23 +23,10 @@ except Exception as e:
     print(f"Authentication failed: {e}")
 # download checkpoints
-subprocess.run(
-    [
-        "python",
-        os.path.join(PWD, "cosmos-transfer1", "scripts", "download_checkpoints.py"),
-        "--output_dir",
-        os.path.join(PWD, "cosmos-transfer1", "checkpoints"),
-        "--model",
-        "7b_av",
-    ],
-    shell=True,
-)
-# subprocess.run(
-#     f"python cosmos-transfer1/scripts/download_checkpoints.py \
-#       --hf_token {hf_token} \
-#       --output_dir cosmos-transfer1/checkpoints/ \
-#       --model 7b_av"
-# )
 os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Workaround to suppress MP warning

 import os
 from typing import List, Tuple
 PWD = os.path.dirname(__file__)
 import subprocess
 subprocess.run("pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True)
     print(f"Authentication failed: {e}")
 # download checkpoints
+from download_checkpoints import main as download_checkpoints
+os.makedirs("./checkpoints", exist_ok=True)
+download_checkpoints(hf_token="", output_dir="./checkpoints", model="7b_av")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Workaround to suppress MP warning

cosmos-transfer1/.flake8 DELETED Viewed

@@ -1,10 +0,0 @@
-[flake8]
-enable-extensions = G
-select = B,C,E,F,G,P,SIM1,T4,W,B9
-max-line-length = 120
-# C408 ignored because we like the dict keyword argument syntax
-# E501 is not flexible enough, we're using B950 instead
-ignore =
-    E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,E226,E265
-exclude =
-    third_party

cosmos-transfer1/.gitignore DELETED Viewed

@@ -1,242 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Misc
-outputs/
-checkpoints/*
-!checkpoints/README.md
-# Data types
-*.jit
-*.pt
-*.hdr
-*.webp
-*.pgm
-*.tiff
-*.tif
-*.tar
-*.tar.gz
-*.gz
-*.pkl
-*.pt
-*.bin
-# Other uncheckable file types
-*.zip
-*.exe
-*.dll
-*.swp
-*.vscode
-*.DS_Store
-*.pyc
-*Thumbs.db
-*.patch
-# Credential information that should never be checked in
-credentials
-*.secret
-# ------------------------ BELOW IS AUTO-GENERATED FOR PYTHON REPOS ------------------------
-# Byte-compiled / optimized / DLL files
-**/__pycache__/
-*.py[cod]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-results/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.config
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Third party
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/#use-with-ide
-.pdm.toml
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# ruff
-.ruff_cache
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-CLIP
-.devcontainer/devcontainer.json
-# Coverage
-.coverage
-coverage.xml
-# JUnit Reports
-report.xml
-# CI-CD
-temp/
-envs.txt
-manifest.json
-# locks and t5 temp files
-*.locks*
-*.no_exist*
-*models--t5*
-# OneLogger
-wandb/
-onelogger.err
-onelogger.log

cosmos-transfer1/.pre-commit-config.yaml DELETED Viewed

@@ -1,55 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-default_language_version:
-  python: python3.10
-repos:
-  - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
-    hooks:
-      - id: flake8
-        args:
-          - --max-line-length=120
-          - --ignore=E501,F401,E203,E402,E265,E741,F841,F821,F811,W503,E231,E225,E702
-        exclude: ^dist/|^third_party/
-  - repo: https://github.com/psf/black
-    rev: 23.12.1
-    hooks:
-      - id: black
-        args: [--line-length=120]
-        exclude: ^dist/|^third_party/
-  - repo: https://github.com/timothycrosley/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-        args: [--line-length=120]
-  - repo: https://github.com/MarcoGorelli/absolufy-imports
-    rev: v0.3.1
-    hooks:
-    -   id: absolufy-imports
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
-    hooks:
-      - id: trailing-whitespace
-        exclude: ^tests/.*/fixtures/.*
-        args: [--markdown-linebreak-ext=md]
-      - id: end-of-file-fixer
-        exclude: ^tests/.*/fixtures/.*
-      - id: check-added-large-files
-        args: ['--maxkb=2000']

cosmos-transfer1/ATTRIBUTIONS.md DELETED Viewed

@@ -1,1661 +0,0 @@
-# Open Source License Attribution
-   Cosmos uses Open Source components. You can find the details of these open-source projects along with license information below, sorted alphabetically.
-   We are grateful to the developers for their contributions to open source and acknowledge these below.
-## Better-Profanity - [MIT License](https://github.com/snguyenthanh/better_profanity/blob/master/LICENSE)
-   ```
-   Copyright (c) 2018 The Python Packaging Authority
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in all
-   copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-   ```
-## FFmpeg - [FFMPEG License](https://github.com/FFmpeg/FFmpeg/blob/master/LICENSE.md)
-   ```
-   # License
-   Most files in FFmpeg are under the GNU Lesser General Public License version 2.1
-   or later (LGPL v2.1+). Read the file `COPYING.LGPLv2.1` for details. Some other
-   files have MIT/X11/BSD-style licenses. In combination the LGPL v2.1+ applies to
-   FFmpeg.
-   Some optional parts of FFmpeg are licensed under the GNU General Public License
-   version 2 or later (GPL v2+). See the file `COPYING.GPLv2` for details. None of
-   these parts are used by default, you have to explicitly pass `--enable-gpl` to
-   configure to activate them. In this case, FFmpeg's license changes to GPL v2+.
-   Specifically, the GPL parts of FFmpeg are:
-   - libpostproc
-   - optional x86 optimization in the files
-       - `libavcodec/x86/flac_dsp_gpl.asm`
-       - `libavcodec/x86/idct_mmx.c`
-       - `libavfilter/x86/vf_removegrain.asm`
-   - the following building and testing tools
-       - `compat/solaris/make_sunver.pl`
-       - `doc/t2h.pm`
-       - `doc/texi2pod.pl`
-       - `libswresample/tests/swresample.c`
-       - `tests/checkasm/*`
-       - `tests/tiny_ssim.c`
-   - the following filters in libavfilter:
-       - `signature_lookup.c`
-       - `vf_blackframe.c`
-       - `vf_boxblur.c`
-       - `vf_colormatrix.c`
-       - `vf_cover_rect.c`
-       - `vf_cropdetect.c`
-       - `vf_delogo.c`
-       - `vf_eq.c`
-       - `vf_find_rect.c`
-       - `vf_fspp.c`
-       - `vf_histeq.c`
-       - `vf_hqdn3d.c`
-       - `vf_kerndeint.c`
-       - `vf_lensfun.c` (GPL version 3 or later)
-       - `vf_mcdeint.c`
-       - `vf_mpdecimate.c`
-       - `vf_nnedi.c`
-       - `vf_owdenoise.c`
-       - `vf_perspective.c`
-       - `vf_phase.c`
-       - `vf_pp.c`
-       - `vf_pp7.c`
-       - `vf_pullup.c`
-       - `vf_repeatfields.c`
-       - `vf_sab.c`
-       - `vf_signature.c`
-       - `vf_smartblur.c`
-       - `vf_spp.c`
-       - `vf_stereo3d.c`
-       - `vf_super2xsai.c`
-       - `vf_tinterlace.c`
-       - `vf_uspp.c`
-       - `vf_vaguedenoiser.c`
-       - `vsrc_mptestsrc.c`
-   Should you, for whatever reason, prefer to use version 3 of the (L)GPL, then
-   the configure parameter `--enable-version3` will activate this licensing option
-   for you. Read the file `COPYING.LGPLv3` or, if you have enabled GPL parts,
-   `COPYING.GPLv3` to learn the exact legal terms that apply in this case.
-   There are a handful of files under other licensing terms, namely:
-   * The files `libavcodec/jfdctfst.c`, `libavcodec/jfdctint_template.c` and
-     `libavcodec/jrevdct.c` are taken from libjpeg, see the top of the files for
-     licensing details. Specifically note that you must credit the IJG in the
-     documentation accompanying your program if you only distribute executables.
-     You must also indicate any changes including additions and deletions to
-     those three files in the documentation.
-   * `tests/reference.pnm` is under the expat license.
-   ## External libraries
-   FFmpeg can be combined with a number of external libraries, which sometimes
-   affect the licensing of binaries resulting from the combination.
-   ### Compatible libraries
-   The following libraries are under GPL version 2:
-   - avisynth
-   - frei0r
-   - libcdio
-   - libdavs2
-   - librubberband
-   - libvidstab
-   - libx264
-   - libx265
-   - libxavs
-   - libxavs2
-   - libxvid
-   When combining them with FFmpeg, FFmpeg needs to be licensed as GPL as well by
-   passing `--enable-gpl` to configure.
-   The following libraries are under LGPL version 3:
-   - gmp
-   - libaribb24
-   - liblensfun
-   When combining them with FFmpeg, use the configure option `--enable-version3` to
-   upgrade FFmpeg to the LGPL v3.
-   The VMAF, mbedTLS, RK MPI, OpenCORE and VisualOn libraries are under the Apache License
-   2.0. That license is incompatible with the LGPL v2.1 and the GPL v2, but not with
-   version 3 of those licenses. So to combine these libraries with FFmpeg, the
-   license version needs to be upgraded by passing `--enable-version3` to configure.
-   The smbclient library is under the GPL v3, to combine it with FFmpeg,
-   the options `--enable-gpl` and `--enable-version3` have to be passed to
-   configure to upgrade FFmpeg to the GPL v3.
-   ### Incompatible libraries
-   There are certain libraries you can combine with FFmpeg whose licenses are not
-   compatible with the GPL and/or the LGPL. If you wish to enable these
-   libraries, even in circumstances that their license may be incompatible, pass
-   `--enable-nonfree` to configure. This will cause the resulting binary to be
-   unredistributable.
-   The Fraunhofer FDK AAC and OpenSSL libraries are under licenses which are
-   incompatible with the GPLv2 and v3. To the best of our knowledge, they are
-   compatible with the LGPL.
-   ```
-## Hydra-core [MIT License](https://github.com/facebookresearch/hydra/blob/main/LICENSE)
-   ```
-   MIT License
-   Copyright (c) Facebook, Inc. and its affiliates.
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in all
-   copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-   ```
-## ImageIo - [BSD 2-Clause "Simplified" License](https://github.com/imageio/imageio/blob/master/LICENSE)
-   ```
-   Copyright (c) 2014-2022, imageio developers
-   All rights reserved.
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-   * Redistributions of source code must retain the above copyright notice, this
-     list of conditions and the following disclaimer.
-   * Redistributions in binary form must reproduce the above copyright notice,
-     this list of conditions and the following disclaimer in the documentation
-     and/or other materials provided with the distribution.
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   ```
-## Iopath - [MIT License](https://github.com/facebookresearch/iopath/blob/main/LICENSE)
-   ```
-   MIT License
-   Copyright (c) Facebook, Inc. and its affiliates.
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in all
-   copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-   ```
-## Llama-Guard-3-8B [META LLAMA 3 COMMUNITY LICENSE](https://github.com/meta-llama/llama3/blob/main/LICENSE)
-   ```
-   META LLAMA 3 COMMUNITY LICENSE AGREEMENT
-   Meta Llama 3 Version Release Date: April 18, 2024
-   “Agreement” means the terms and conditions for use, reproduction, distribution, and
-   modification of the Llama Materials set forth herein.
-   “Documentation” means the specifications, manuals, and documentation accompanying Meta
-   Llama 3 distributed by Meta at https://llama.meta.com/get-started/.
-   “Licensee” or “you” means you, or your employer or any other person or entity (if you are
-   entering into this Agreement on such person or entity’s behalf), of the age required under
-   applicable laws, rules, or regulations to provide legal consent and that has legal authority
-   to bind your employer or such other person or entity if you are entering into this Agreement
-   on their behalf.
-   “Meta Llama 3” means the foundational large language models and software and algorithms,
-   including machine-learning model code, trained model weights, inference-enabling code,
-   training-enabling code, fine-tuning-enabling code, and other elements of the foregoing
-   distributed by Meta at https://llama.meta.com/llama-downloads.
-   “Llama Materials” means, collectively, Meta’s proprietary Meta Llama 3 and Documentation
-   (and any portion thereof) made available under this Agreement.
-   “Meta” or “we” means Meta Platforms Ireland Limited (if you are located in or, if you are
-   an entity, your principal place of business is in the EEA or Switzerland) and Meta Platforms,
-   Inc. (if you are located outside of the EEA or Switzerland).
-   By clicking “I Accept” below or by using or distributing any portion or element of the Llama
-   Materials, you agree to be bound by this Agreement.
-   1. License Rights and Redistribution.
-   a. Grant of Rights. You are granted a non-exclusive, worldwide, non-transferable and
-   royalty-free limited license under Meta’s intellectual property or other rights owned by
-   Meta embodied in the Llama Materials to use, reproduce, distribute, copy, create derivative
-   works of, and make modifications to the Llama Materials.
-   b. Redistribution and Use.
-      i. If you distribute or make available the Llama Materials (or any derivative works
-      thereof), or a product or service that uses any of them, including another AI model, you
-      shall (A) provide a copy of this Agreement with any such Llama Materials; and (B)
-      prominently display “Built with Meta Llama 3” on a related website, user interface,
-      blogpost, about page, or product documentation. If you use the Llama Materials to create,
-      train, fine tune, or otherwise improve an AI model, which is distributed or made available,
-      you shall also include “Llama 3” at the beginning of any such AI model name.
-      ii. If you receive Llama Materials, or any derivative works thereof, from a Licensee as
-      part of an integrated end user product, then Section 2 of this Agreement will not apply
-      to you.
-      iii. You must retain in all copies of the Llama Materials that you distribute the
-      following attribution notice within a “Notice” text file distributed as a part of such
-      copies: “Meta Llama 3 is licensed under the Meta Llama 3 Community License, Copyright ©
-      Meta Platforms, Inc. All Rights Reserved.”
-      iv. Your use of the Llama Materials must comply with applicable laws and regulations
-      (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy
-      for the Llama Materials (available at https://llama.meta.com/llama3/use-policy), which
-      is hereby incorporated by reference into this Agreement.
-      v. You will not use the Llama Materials or any output or results of the Llama Materials
-      to improve any other large language model (excluding Meta Llama 3 or derivative works
-      thereof).
-   2. Additional Commercial Terms.
-   If, on the Meta Llama 3 version release date, the monthly active users of the products or
-   services made available by or for Licensee, or Licensee’s affiliates, is greater than 700
-   million monthly active users in the preceding calendar month, you must request a license
-   from Meta, which Meta may grant to you in its sole discretion, and you are not authorized
-   to exercise any of the rights under this Agreement unless or until Meta otherwise expressly
-   grants you such rights.
-   3. Disclaimer of Warranty.
-   UNLESS REQUIRED BY APPLICABLE LAW, THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM
-   ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, AND META DISCLAIMS ALL
-   WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY
-   WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
-   YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING
-   THE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE LLAMA MATERIALS
-   AND ANY OUTPUT AND RESULTS.
-   4. Limitation of Liability.
-   IN NO EVENT WILL META OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT,
-   FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR
-   PUNITIVE DAMAGES, EVEN IF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY
-   OF THE FOREGOING.
-   5. Intellectual Property.
-   a. No trademark licenses are granted under this Agreement, and in connection with the Llama
-   Materials, neither Meta nor Licensee may use any name or mark owned by or associated with
-   the other or any of its affiliates, except as required for reasonable and customary use in
-   describing and redistributing the Llama Materials or as set forth in this Section 5(a).
-   Meta hereby grants you a license to use “Llama 3” (the “Mark”) solely as required to comply
-   with the last sentence of Section 1.b.i. You will comply with Meta’s brand guidelines
-   (currently accessible at https://about.meta.com/brand/resources/meta/company-brand/).
-   All goodwill arising out of your use of the Mark will inure to the benefit of Meta.
-   b. Subject to Meta’s ownership of Llama Materials and derivatives made by or for Meta, with
-   respect to any derivative works and modifications of the Llama Materials that are made by
-   you, as between you and Meta, you are and will be the owner of such derivative works and
-   modifications.
-   c. If you institute litigation or other proceedings against Meta or any entity (including a
-   cross-claim or counterclaim in a lawsuit) alleging that the Llama Materials or Meta Llama 3
-   outputs or results, or any portion of any of the foregoing, constitutes infringement of
-   intellectual property or other rights owned or licensable by you, then any licenses granted
-   to you under this Agreement shall terminate as of the date such litigation or claim is filed
-   or instituted. You will indemnify and hold harmless Meta from and against any claim by any
-   third party arising out of or related to your use or distribution of the Llama Materials.
-   6. Term and Termination.
-   The term of this Agreement will commence upon your acceptance of this Agreement or access
-   to the Llama Materials and will continue in full force and effect until terminated in
-   accordance with the terms and conditions herein. Meta may terminate this Agreement if you
-   are in breach of any term or condition of this Agreement. Upon termination of this Agreement,
-   you shall delete and cease use of the Llama Materials. Sections 3, 4, and 7 shall survive
-   the termination of this Agreement.
-   7. Governing Law and Jurisdiction.
-   This Agreement will be governed and construed under the laws of the State of California
-   without regard to choice of law principles, and the UN Convention on Contracts for the
-   International Sale of Goods does not apply to this Agreement. The courts of California
-   shall have exclusive jurisdiction of any dispute arising out of this Agreement.
-   META LLAMA 3 ACCEPTABLE USE POLICY
-   Meta is committed to promoting safe and fair use of its tools and features, including Meta
-   Llama 3. If you access or use Meta Llama 3, you agree to this Acceptable Use Policy
-   (“Policy”). The most recent copy of this policy can be found at
-   https://llama.meta.com/llama3/use-policy.
-   Prohibited Uses
-   We want everyone to use Meta Llama 3 safely and responsibly. You agree you will not use, or
-   allow others to use, Meta Llama 3 to:
-   1. Violate the law or others’ rights, including to:
-   a. Engage in, promote, generate, contribute to, encourage, plan, incite, or further illegal
-   or unlawful activity or content, such as:
-      i. Violence or terrorism
-      ii. Exploitation or harm to children, including the solicitation, creation, acquisition,
-      or dissemination of child exploitative content or failure to report Child Sexual Abuse
-      Material
-      iii. Human trafficking, exploitation, and sexual violence
-      iv. The illegal distribution of information or materials to minors, including obscene
-      materials, or failure to employ legally required age-gating in connection with such
-      information or materials
-      v. Sexual solicitation
-      vi. Any other criminal activity
-   b. Engage in, promote, incite, or facilitate the harassment, abuse, threatening, or
-   bullying of individuals or groups of individuals
-   c. Engage in, promote, incite, or facilitate discrimination or other unlawful or harmful
-   conduct in the provision of employment, employment benefits, credit, housing, other economic
-   benefits, or other essential goods and services
-   d. Engage in the unauthorized or unlicensed practice of any profession including, but not
-   limited to, financial, legal, medical/health, or related professional practices
-   e. Collect, process, disclose, generate, or infer health, demographic, or other sensitive
-   personal or private information about individuals without rights and consents required by
-   applicable laws
-   f. Engage in or facilitate any action or generate any content that infringes, misappropriates,
-   or otherwise violates any third-party rights, including the outputs or results of any
-   products or services using the Llama Materials
-   g. Create, generate, or facilitate the creation of malicious code, malware, computer viruses
-   or do anything else that could disable, overburden, interfere with or impair the proper
-   working, integrity, operation, or appearance of a website or computer system
-   2. Engage in, promote, incite, facilitate, or assist in the planning or development of
-   activities that present a risk of death or bodily harm to individuals, including use of Meta
-   Llama 3 related to the following:
-   a. Military, warfare, nuclear industries or applications, espionage, use for materials or
-   activities that are subject to the International Traffic Arms Regulations (ITAR) maintained
-   by the United States Department of State
-   b. Guns and illegal weapons (including weapon development)
-   c. Illegal drugs and regulated/controlled substances
-   d. Operation of critical infrastructure, transportation technologies, or heavy machinery
-   e. Self-harm or harm to others, including suicide, cutting, and eating disorders
-   f. Any content intended to incite or promote violence, abuse, or any infliction of bodily
-   harm to an individual
-   3. Intentionally deceive or mislead others, including use of Meta Llama 3 related to the
-   following:
-   a. Generating, promoting, or furthering fraud or the creation or promotion of disinformation
-   b. Generating, promoting, or furthering defamatory content, including the creation of
-   defamatory statements, images, or other content
-   c. Generating, promoting, or further distributing spam
-   d. Impersonating another individual without consent, authorization, or legal right
-   e. Representing that the use of Meta Llama 3 or outputs are human-generated
-   f. Generating or facilitating false online engagement, including fake reviews and other
-   means of fake online engagement
-   g. Fail to appropriately disclose to end users any known dangers of your AI system
-   Please report any violation of this Policy, software “bug,” or other problems that could
-   lead to a violation of this Policy through one of the following means:
-   * Reporting issues with the model: https://github.com/meta-llama/llama3
-   * Reporting risky content generated by the model: developers.facebook.com/llama_output_feedback
-   * Reporting bugs and security concerns: facebook.com/whitehat/info
-   * Reporting violations of the Acceptable Use Policy or unlicensed uses of Meta Llama 3:
-      [email protected]
-   ```
-## Loguru - [MIT License](https://github.com/Delgan/loguru/blob/master/LICENSE)
-   ```
-   MIT License
-   Copyright (c) 2017
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in all
-   copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-   ```
-## Mediapy - [Apache License 2.0](https://github.com/google/mediapy/blob/main/LICENSE)
-   ```
-                                    Apache License
-                              Version 2.0, January 2004
-                           http://www.apache.org/licenses/
-      TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-      1. Definitions.
-         "License" shall mean the terms and conditions for use, reproduction,
-         and distribution as defined by Sections 1 through 9 of this document.
-         "Licensor" shall mean the copyright owner or entity authorized by
-         the copyright owner that is granting the License.
-         "Legal Entity" shall mean the union of the acting entity and all
-         other entities that control, are controlled by, or are under common
-         control with that entity. For the purposes of this definition,
-         "control" means (i) the power, direct or indirect, to cause the
-         direction or management of such entity, whether by contract or
-         otherwise, or (ii) ownership of fifty percent (50%) or more of the
-         outstanding shares, or (iii) beneficial ownership of such entity.
-         "You" (or "Your") shall mean an individual or Legal Entity
-         exercising permissions granted by this License.
-         "Source" form shall mean the preferred form for making modifications,
-         including but not limited to software source code, documentation
-         source, and configuration files.
-         "Object" form shall mean any form resulting from mechanical
-         transformation or translation of a Source form, including but
-         not limited to compiled object code, generated documentation,
-         and conversions to other media types.
-         "Work" shall mean the work of authorship, whether in Source or
-         Object form, made available under the License, as indicated by a
-         copyright notice that is included in or attached to the work
-         (an example is provided in the Appendix below).
-         "Derivative Works" shall mean any work, whether in Source or Object
-         form, that is based on (or derived from) the Work and for which the
-         editorial revisions, annotations, elaborations, or other modifications
-         represent, as a whole, an original work of authorship. For the purposes
-         of this License, Derivative Works shall not include works that remain
-         separable from, or merely link (or bind by name) to the interfaces of,
-         the Work and Derivative Works thereof.
-         "Contribution" shall mean any work of authorship, including
-         the original version of the Work and any modifications or additions
-         to that Work or Derivative Works thereof, that is intentionally
-         submitted to Licensor for inclusion in the Work by the copyright owner
-         or by an individual or Legal Entity authorized to submit on behalf of
-         the copyright owner. For the purposes of this definition, "submitted"
-         means any form of electronic, verbal, or written communication sent
-         to the Licensor or its representatives, including but not limited to
-         communication on electronic mailing lists, source code control systems,
-         and issue tracking systems that are managed by, or on behalf of, the
-         Licensor for the purpose of discussing and improving the Work, but
-         excluding communication that is conspicuously marked or otherwise
-         designated in writing by the copyright owner as "Not a Contribution."
-         "Contributor" shall mean Licensor and any individual or Legal Entity
-         on behalf of whom a Contribution has been received by Licensor and
-         subsequently incorporated within the Work.
-      2. Grant of Copyright License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         copyright license to reproduce, prepare Derivative Works of,
-         publicly display, publicly perform, sublicense, and distribute the
-         Work and such Derivative Works in Source or Object form.
-      3. Grant of Patent License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         (except as stated in this section) patent license to make, have made,
-         use, offer to sell, sell, import, and otherwise transfer the Work,
-         where such license applies only to those patent claims licensable
-         by such Contributor that are necessarily infringed by their
-         Contribution(s) alone or by combination of their Contribution(s)
-         with the Work to which such Contribution(s) was submitted. If You
-         institute patent litigation against any entity (including a
-         cross-claim or counterclaim in a lawsuit) alleging that the Work
-         or a Contribution incorporated within the Work constitutes direct
-         or contributory patent infringement, then any patent licenses
-         granted to You under this License for that Work shall terminate
-         as of the date such litigation is filed.
-      4. Redistribution. You may reproduce and distribute copies of the
-         Work or Derivative Works thereof in any medium, with or without
-         modifications, and in Source or Object form, provided that You
-         meet the following conditions:
-         (a) You must give any other recipients of the Work or
-             Derivative Works a copy of this License; and
-         (b) You must cause any modified files to carry prominent notices
-             stating that You changed the files; and
-         (c) You must retain, in the Source form of any Derivative Works
-             that You distribute, all copyright, patent, trademark, and
-             attribution notices from the Source form of the Work,
-             excluding those notices that do not pertain to any part of
-             the Derivative Works; and
-         (d) If the Work includes a "NOTICE" text file as part of its
-             distribution, then any Derivative Works that You distribute must
-             include a readable copy of the attribution notices contained
-             within such NOTICE file, excluding those notices that do not
-             pertain to any part of the Derivative Works, in at least one
-             of the following places: within a NOTICE text file distributed
-             as part of the Derivative Works; within the Source form or
-             documentation, if provided along with the Derivative Works; or,
-             within a display generated by the Derivative Works, if and
-             wherever such third-party notices normally appear. The contents
-             of the NOTICE file are for informational purposes only and
-             do not modify the License. You may add Your own attribution
-             notices within Derivative Works that You distribute, alongside
-             or as an addendum to the NOTICE text from the Work, provided
-             that such additional attribution notices cannot be construed
-             as modifying the License.
-         You may add Your own copyright statement to Your modifications and
-         may provide additional or different license terms and conditions
-         for use, reproduction, or distribution of Your modifications, or
-         for any such Derivative Works as a whole, provided Your use,
-         reproduction, and distribution of the Work otherwise complies with
-         the conditions stated in this License.
-      5. Submission of Contributions. Unless You explicitly state otherwise,
-         any Contribution intentionally submitted for inclusion in the Work
-         by You to the Licensor shall be under the terms and conditions of
-         this License, without any additional terms or conditions.
-         Notwithstanding the above, nothing herein shall supersede or modify
-         the terms of any separate license agreement you may have executed
-         with Licensor regarding such Contributions.
-      6. Trademarks. This License does not grant permission to use the trade
-         names, trademarks, service marks, or product names of the Licensor,
-         except as required for reasonable and customary use in describing the
-         origin of the Work and reproducing the content of the NOTICE file.
-      7. Disclaimer of Warranty. Unless required by applicable law or
-         agreed to in writing, Licensor provides the Work (and each
-         Contributor provides its Contributions) on an "AS IS" BASIS,
-         WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-         implied, including, without limitation, any warranties or conditions
-         of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-         PARTICULAR PURPOSE. You are solely responsible for determining the
-         appropriateness of using or redistributing the Work and assume any
-         risks associated with Your exercise of permissions under this License.
-      8. Limitation of Liability. In no event and under no legal theory,
-         whether in tort (including negligence), contract, or otherwise,
-         unless required by applicable law (such as deliberate and grossly
-         negligent acts) or agreed to in writing, shall any Contributor be
-         liable to You for damages, including any direct, indirect, special,
-         incidental, or consequential damages of any character arising as a
-         result of this License or out of the use or inability to use the
-         Work (including but not limited to damages for loss of goodwill,
-         work stoppage, computer failure or malfunction, or any and all
-         other commercial damages or losses), even if such Contributor
-         has been advised of the possibility of such damages.
-      9. Accepting Warranty or Additional Liability. While redistributing
-         the Work or Derivative Works thereof, You may choose to offer,
-         and charge a fee for, acceptance of support, warranty, indemnity,
-         or other liability obligations and/or rights consistent with this
-         License. However, in accepting such obligations, You may act only
-         on Your own behalf and on Your sole responsibility, not on behalf
-         of any other Contributor, and only if You agree to indemnify,
-         defend, and hold each Contributor harmless for any liability
-         incurred by, or claims asserted against, such Contributor by reason
-         of your accepting any such warranty or additional liability.
-      END OF TERMS AND CONDITIONS
-      APPENDIX: How to apply the Apache License to your work.
-         To apply the Apache License to your work, attach the following
-         boilerplate notice, with the fields enclosed by brackets "[]"
-         replaced with your own identifying information. (Don't include
-         the brackets!)  The text should be enclosed in the appropriate
-         comment syntax for the file format. We also recommend that a
-         file or class name and description of purpose be included on the
-         same "printed page" as the copyright notice for easier
-         identification within third-party archives.
-      Copyright [yyyy] [name of copyright owner]
-      Licensed under the Apache License, Version 2.0 (the "License");
-      you may not use this file except in compliance with the License.
-      You may obtain a copy of the License at
-          http://www.apache.org/licenses/LICENSE-2.0
-      Unless required by applicable law or agreed to in writing, software
-      distributed under the License is distributed on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-      See the License for the specific language governing permissions and
-      limitations under the License.
-   ```
-## Nltk - [Apache License 2.0](https://github.com/nltk/nltk/blob/develop/LICENSE.txt)
-   ```
-                                    Apache License
-                              Version 2.0, January 2004
-                           http://www.apache.org/licenses/
-      TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-      1. Definitions.
-         "License" shall mean the terms and conditions for use, reproduction,
-         and distribution as defined by Sections 1 through 9 of this document.
-         "Licensor" shall mean the copyright owner or entity authorized by
-         the copyright owner that is granting the License.
-         "Legal Entity" shall mean the union of the acting entity and all
-         other entities that control, are controlled by, or are under common
-         control with that entity. For the purposes of this definition,
-         "control" means (i) the power, direct or indirect, to cause the
-         direction or management of such entity, whether by contract or
-         otherwise, or (ii) ownership of fifty percent (50%) or more of the
-         outstanding shares, or (iii) beneficial ownership of such entity.
-         "You" (or "Your") shall mean an individual or Legal Entity
-         exercising permissions granted by this License.
-         "Source" form shall mean the preferred form for making modifications,
-         including but not limited to software source code, documentation
-         source, and configuration files.
-         "Object" form shall mean any form resulting from mechanical
-         transformation or translation of a Source form, including but
-         not limited to compiled object code, generated documentation,
-         and conversions to other media types.
-         "Work" shall mean the work of authorship, whether in Source or
-         Object form, made available under the License, as indicated by a
-         copyright notice that is included in or attached to the work
-         (an example is provided in the Appendix below).
-         "Derivative Works" shall mean any work, whether in Source or Object
-         form, that is based on (or derived from) the Work and for which the
-         editorial revisions, annotations, elaborations, or other modifications
-         represent, as a whole, an original work of authorship. For the purposes
-         of this License, Derivative Works shall not include works that remain
-         separable from, or merely link (or bind by name) to the interfaces of,
-         the Work and Derivative Works thereof.
-         "Contribution" shall mean any work of authorship, including
-         the original version of the Work and any modifications or additions
-         to that Work or Derivative Works thereof, that is intentionally
-         submitted to Licensor for inclusion in the Work by the copyright owner
-         or by an individual or Legal Entity authorized to submit on behalf of
-         the copyright owner. For the purposes of this definition, "submitted"
-         means any form of electronic, verbal, or written communication sent
-         to the Licensor or its representatives, including but not limited to
-         communication on electronic mailing lists, source code control systems,
-         and issue tracking systems that are managed by, or on behalf of, the
-         Licensor for the purpose of discussing and improving the Work, but
-         excluding communication that is conspicuously marked or otherwise
-         designated in writing by the copyright owner as "Not a Contribution."
-         "Contributor" shall mean Licensor and any individual or Legal Entity
-         on behalf of whom a Contribution has been received by Licensor and
-         subsequently incorporated within the Work.
-      2. Grant of Copyright License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         copyright license to reproduce, prepare Derivative Works of,
-         publicly display, publicly perform, sublicense, and distribute the
-         Work and such Derivative Works in Source or Object form.
-      3. Grant of Patent License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         (except as stated in this section) patent license to make, have made,
-         use, offer to sell, sell, import, and otherwise transfer the Work,
-         where such license applies only to those patent claims licensable
-         by such Contributor that are necessarily infringed by their
-         Contribution(s) alone or by combination of their Contribution(s)
-         with the Work to which such Contribution(s) was submitted. If You
-         institute patent litigation against any entity (including a
-         cross-claim or counterclaim in a lawsuit) alleging that the Work
-         or a Contribution incorporated within the Work constitutes direct
-         or contributory patent infringement, then any patent licenses
-         granted to You under this License for that Work shall terminate
-         as of the date such litigation is filed.
-      4. Redistribution. You may reproduce and distribute copies of the
-         Work or Derivative Works thereof in any medium, with or without
-         modifications, and in Source or Object form, provided that You
-         meet the following conditions:
-         (a) You must give any other recipients of the Work or
-             Derivative Works a copy of this License; and
-         (b) You must cause any modified files to carry prominent notices
-             stating that You changed the files; and
-         (c) You must retain, in the Source form of any Derivative Works
-             that You distribute, all copyright, patent, trademark, and
-             attribution notices from the Source form of the Work,
-             excluding those notices that do not pertain to any part of
-             the Derivative Works; and
-         (d) If the Work includes a "NOTICE" text file as part of its
-             distribution, then any Derivative Works that You distribute must
-             include a readable copy of the attribution notices contained
-             within such NOTICE file, excluding those notices that do not
-             pertain to any part of the Derivative Works, in at least one
-             of the following places: within a NOTICE text file distributed
-             as part of the Derivative Works; within the Source form or
-             documentation, if provided along with the Derivative Works; or,
-             within a display generated by the Derivative Works, if and
-             wherever such third-party notices normally appear. The contents
-             of the NOTICE file are for informational purposes only and
-             do not modify the License. You may add Your own attribution
-             notices within Derivative Works that You distribute, alongside
-             or as an addendum to the NOTICE text from the Work, provided
-             that such additional attribution notices cannot be construed
-             as modifying the License.
-         You may add Your own copyright statement to Your modifications and
-         may provide additional or different license terms and conditions
-         for use, reproduction, or distribution of Your modifications, or
-         for any such Derivative Works as a whole, provided Your use,
-         reproduction, and distribution of the Work otherwise complies with
-         the conditions stated in this License.
-      5. Submission of Contributions. Unless You explicitly state otherwise,
-         any Contribution intentionally submitted for inclusion in the Work
-         by You to the Licensor shall be under the terms and conditions of
-         this License, without any additional terms or conditions.
-         Notwithstanding the above, nothing herein shall supersede or modify
-         the terms of any separate license agreement you may have executed
-         with Licensor regarding such Contributions.
-      6. Trademarks. This License does not grant permission to use the trade
-         names, trademarks, service marks, or product names of the Licensor,
-         except as required for reasonable and customary use in describing the
-         origin of the Work and reproducing the content of the NOTICE file.
-      7. Disclaimer of Warranty. Unless required by applicable law or
-         agreed to in writing, Licensor provides the Work (and each
-         Contributor provides its Contributions) on an "AS IS" BASIS,
-         WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-         implied, including, without limitation, any warranties or conditions
-         of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-         PARTICULAR PURPOSE. You are solely responsible for determining the
-         appropriateness of using or redistributing the Work and assume any
-         risks associated with Your exercise of permissions under this License.
-      8. Limitation of Liability. In no event and under no legal theory,
-         whether in tort (including negligence), contract, or otherwise,
-         unless required by applicable law (such as deliberate and grossly
-         negligent acts) or agreed to in writing, shall any Contributor be
-         liable to You for damages, including any direct, indirect, special,
-         incidental, or consequential damages of any character arising as a
-         result of this License or out of the use or inability to use the
-         Work (including but not limited to damages for loss of goodwill,
-         work stoppage, computer failure or malfunction, or any and all
-         other commercial damages or losses), even if such Contributor
-         has been advised of the possibility of such damages.
-      9. Accepting Warranty or Additional Liability. While redistributing
-         the Work or Derivative Works thereof, You may choose to offer,
-         and charge a fee for, acceptance of support, warranty, indemnity,
-         or other liability obligations and/or rights consistent with this
-         License. However, in accepting such obligations, You may act only
-         on Your own behalf and on Your sole responsibility, not on behalf
-         of any other Contributor, and only if You agree to indemnify,
-         defend, and hold each Contributor harmless for any liability
-         incurred by, or claims asserted against, such Contributor by reason
-         of your accepting any such warranty or additional liability.
-      END OF TERMS AND CONDITIONS
-      APPENDIX: How to apply the Apache License to your work.
-         To apply the Apache License to your work, attach the following
-         boilerplate notice, with the fields enclosed by brackets "[]"
-         replaced with your own identifying information. (Don't include
-         the brackets!)  The text should be enclosed in the appropriate
-         comment syntax for the file format. We also recommend that a
-         file or class name and description of purpose be included on the
-         same "printed page" as the copyright notice for easier
-         identification within third-party archives.
-      Copyright [yyyy] [name of copyright owner]
-      Licensed under the Apache License, Version 2.0 (the "License");
-      you may not use this file except in compliance with the License.
-      You may obtain a copy of the License at
-          http://www.apache.org/licenses/LICENSE-2.0
-      Unless required by applicable law or agreed to in writing, software
-      distributed under the License is distributed on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-      See the License for the specific language governing permissions and
-      limitations under the License.
-   ```
-## PEFT - [Apache License 2.0](https://github.com/huggingface/peft/blob/main/LICENSE)
-   ```
-                                    Apache License
-                              Version 2.0, January 2004
-                           http://www.apache.org/licenses/
-      TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-      1. Definitions.
-         "License" shall mean the terms and conditions for use, reproduction,
-         and distribution as defined by Sections 1 through 9 of this document.
-         "Licensor" shall mean the copyright owner or entity authorized by
-         the copyright owner that is granting the License.
-         "Legal Entity" shall mean the union of the acting entity and all
-         other entities that control, are controlled by, or are under common
-         control with that entity. For the purposes of this definition,
-         "control" means (i) the power, direct or indirect, to cause the
-         direction or management of such entity, whether by contract or
-         otherwise, or (ii) ownership of fifty percent (50%) or more of the
-         outstanding shares, or (iii) beneficial ownership of such entity.
-         "You" (or "Your") shall mean an individual or Legal Entity
-         exercising permissions granted by this License.
-         "Source" form shall mean the preferred form for making modifications,
-         including but not limited to software source code, documentation
-         source, and configuration files.
-         "Object" form shall mean any form resulting from mechanical
-         transformation or translation of a Source form, including but
-         not limited to compiled object code, generated documentation,
-         and conversions to other media types.
-         "Work" shall mean the work of authorship, whether in Source or
-         Object form, made available under the License, as indicated by a
-         copyright notice that is included in or attached to the work
-         (an example is provided in the Appendix below).
-         "Derivative Works" shall mean any work, whether in Source or Object
-         form, that is based on (or derived from) the Work and for which the
-         editorial revisions, annotations, elaborations, or other modifications
-         represent, as a whole, an original work of authorship. For the purposes
-         of this License, Derivative Works shall not include works that remain
-         separable from, or merely link (or bind by name) to the interfaces of,
-         the Work and Derivative Works thereof.
-         "Contribution" shall mean any work of authorship, including
-         the original version of the Work and any modifications or additions
-         to that Work or Derivative Works thereof, that is intentionally
-         submitted to Licensor for inclusion in the Work by the copyright owner
-         or by an individual or Legal Entity authorized to submit on behalf of
-         the copyright owner. For the purposes of this definition, "submitted"
-         means any form of electronic, verbal, or written communication sent
-         to the Licensor or its representatives, including but not limited to
-         communication on electronic mailing lists, source code control systems,
-         and issue tracking systems that are managed by, or on behalf of, the
-         Licensor for the purpose of discussing and improving the Work, but
-         excluding communication that is conspicuously marked or otherwise
-         designated in writing by the copyright owner as "Not a Contribution."
-         "Contributor" shall mean Licensor and any individual or Legal Entity
-         on behalf of whom a Contribution has been received by Licensor and
-         subsequently incorporated within the Work.
-      2. Grant of Copyright License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         copyright license to reproduce, prepare Derivative Works of,
-         publicly display, publicly perform, sublicense, and distribute the
-         Work and such Derivative Works in Source or Object form.
-      3. Grant of Patent License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         (except as stated in this section) patent license to make, have made,
-         use, offer to sell, sell, import, and otherwise transfer the Work,
-         where such license applies only to those patent claims licensable
-         by such Contributor that are necessarily infringed by their
-         Contribution(s) alone or by combination of their Contribution(s)
-         with the Work to which such Contribution(s) was submitted. If You
-         institute patent litigation against any entity (including a
-         cross-claim or counterclaim in a lawsuit) alleging that the Work
-         or a Contribution incorporated within the Work constitutes direct
-         or contributory patent infringement, then any patent licenses
-         granted to You under this License for that Work shall terminate
-         as of the date such litigation is filed.
-      4. Redistribution. You may reproduce and distribute copies of the
-         Work or Derivative Works thereof in any medium, with or without
-         modifications, and in Source or Object form, provided that You
-         meet the following conditions:
-         (a) You must give any other recipients of the Work or
-             Derivative Works a copy of this License; and
-         (b) You must cause any modified files to carry prominent notices
-             stating that You changed the files; and
-         (c) You must retain, in the Source form of any Derivative Works
-             that You distribute, all copyright, patent, trademark, and
-             attribution notices from the Source form of the Work,
-             excluding those notices that do not pertain to any part of
-             the Derivative Works; and
-         (d) If the Work includes a "NOTICE" text file as part of its
-             distribution, then any Derivative Works that You distribute must
-             include a readable copy of the attribution notices contained
-             within such NOTICE file, excluding those notices that do not
-             pertain to any part of the Derivative Works, in at least one
-             of the following places: within a NOTICE text file distributed
-             as part of the Derivative Works; within the Source form or
-             documentation, if provided along with the Derivative Works; or,
-             within a display generated by the Derivative Works, if and
-             wherever such third-party notices normally appear. The contents
-             of the NOTICE file are for informational purposes only and
-             do not modify the License. You may add Your own attribution
-             notices within Derivative Works that You distribute, alongside
-             or as an addendum to the NOTICE text from the Work, provided
-             that such additional attribution notices cannot be construed
-             as modifying the License.
-         You may add Your own copyright statement to Your modifications and
-         may provide additional or different license terms and conditions
-         for use, reproduction, or distribution of Your modifications, or
-         for any such Derivative Works as a whole, provided Your use,
-         reproduction, and distribution of the Work otherwise complies with
-         the conditions stated in this License.
-      5. Submission of Contributions. Unless You explicitly state otherwise,
-         any Contribution intentionally submitted for inclusion in the Work
-         by You to the Licensor shall be under the terms and conditions of
-         this License, without any additional terms or conditions.
-         Notwithstanding the above, nothing herein shall supersede or modify
-         the terms of any separate license agreement you may have executed
-         with Licensor regarding such Contributions.
-      6. Trademarks. This License does not grant permission to use the trade
-         names, trademarks, service marks, or product names of the Licensor,
-         except as required for reasonable and customary use in describing the
-         origin of the Work and reproducing the content of the NOTICE file.
-      7. Disclaimer of Warranty. Unless required by applicable law or
-         agreed to in writing, Licensor provides the Work (and each
-         Contributor provides its Contributions) on an "AS IS" BASIS,
-         WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-         implied, including, without limitation, any warranties or conditions
-         of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-         PARTICULAR PURPOSE. You are solely responsible for determining the
-         appropriateness of using or redistributing the Work and assume any
-         risks associated with Your exercise of permissions under this License.
-      8. Limitation of Liability. In no event and under no legal theory,
-         whether in tort (including negligence), contract, or otherwise,
-         unless required by applicable law (such as deliberate and grossly
-         negligent acts) or agreed to in writing, shall any Contributor be
-         liable to You for damages, including any direct, indirect, special,
-         incidental, or consequential damages of any character arising as a
-         result of this License or out of the use or inability to use the
-         Work (including but not limited to damages for loss of goodwill,
-         work stoppage, computer failure or malfunction, or any and all
-         other commercial damages or losses), even if such Contributor
-         has been advised of the possibility of such damages.
-      9. Accepting Warranty or Additional Liability. While redistributing
-         the Work or Derivative Works thereof, You may choose to offer,
-         and charge a fee for, acceptance of support, warranty, indemnity,
-         or other liability obligations and/or rights consistent with this
-         License. However, in accepting such obligations, You may act only
-         on Your own behalf and on Your sole responsibility, not on behalf
-         of any other Contributor, and only if You agree to indemnify,
-         defend, and hold each Contributor harmless for any liability
-         incurred by, or claims asserted against, such Contributor by reason
-         of your accepting any such warranty or additional liability.
-      END OF TERMS AND CONDITIONS
-      APPENDIX: How to apply the Apache License to your work.
-         To apply the Apache License to your work, attach the following
-         boilerplate notice, with the fields enclosed by brackets "[]"
-         replaced with your own identifying information. (Don't include
-         the brackets!)  The text should be enclosed in the appropriate
-         comment syntax for the file format. We also recommend that a
-         file or class name and description of purpose be included on the
-         same "printed page" as the copyright notice for easier
-         identification within third-party archives.
-      Copyright [yyyy] [name of copyright owner]
-      Licensed under the Apache License, Version 2.0 (the "License");
-      you may not use this file except in compliance with the License.
-      You may obtain a copy of the License at
-          http://www.apache.org/licenses/LICENSE-2.0
-      Unless required by applicable law or agreed to in writing, software
-      distributed under the License is distributed on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-      See the License for the specific language governing permissions and
-      limitations under the License.
-   ```
-## Pillow - [MIT License](https://github.com/python-pillow/Pillow/blob/main/LICENSE)
-   ```
-   The Python Imaging Library (PIL) is
-       Copyright © 1997-2011 by Secret Labs AB
-       Copyright © 1995-2011 by Fredrik Lundh and contributors
-   Pillow is the friendly PIL fork. It is
-       Copyright © 2010 by Jeffrey A. Clark and contributors
-   Like PIL, Pillow is licensed under the open source MIT-CMU License:
-   By obtaining, using, and/or copying this software and/or its associated
-   documentation, you agree that you have read, understood, and will comply
-   with the following terms and conditions:
-   Permission to use, copy, modify and distribute this software and its
-   documentation for any purpose and without fee is hereby granted,
-   provided that the above copyright notice appears in all copies, and that
-   both that copyright notice and this permission notice appear in supporting
-   documentation, and that the name of Secret Labs AB or the author not be
-   used in advertising or publicity pertaining to distribution of the software
-   without specific, written prior permission.
-   SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
-   SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
-   IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
-   INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
-   LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-   OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-   PERFORMANCE OF THIS SOFTWARE.
-   ```
-## PyAV - [BSD 3-Clause "New" or "Revised" License](https://github.com/PyAV-Org/PyAV/blob/main/LICENSE.txt)
-   ```
-   Copyright retained by original committers. All rights reserved.
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the project nor the names of its contributors may be
-         used to endorse or promote products derived from this software without
-         specific prior written permission.
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT,
-   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-   OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-   EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   ```
-## Pytorch_Retinaface - [MIT License](https://github.com/biubug6/Pytorch_Retinaface/blob/master/LICENSE.MIT)
-   ```
-   MIT License
-   Copyright (c) 2019
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in all
-   copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-   ```
-## Sentencepiece - [Apache License 2.0](https://github.com/google/sentencepiece/blob/master/LICENSE)
-   ```
-                                    Apache License
-                              Version 2.0, January 2004
-                           http://www.apache.org/licenses/
-      TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-      1. Definitions.
-         "License" shall mean the terms and conditions for use, reproduction,
-         and distribution as defined by Sections 1 through 9 of this document.
-         "Licensor" shall mean the copyright owner or entity authorized by
-         the copyright owner that is granting the License.
-         "Legal Entity" shall mean the union of the acting entity and all
-         other entities that control, are controlled by, or are under common
-         control with that entity. For the purposes of this definition,
-         "control" means (i) the power, direct or indirect, to cause the
-         direction or management of such entity, whether by contract or
-         otherwise, or (ii) ownership of fifty percent (50%) or more of the
-         outstanding shares, or (iii) beneficial ownership of such entity.
-         "You" (or "Your") shall mean an individual or Legal Entity
-         exercising permissions granted by this License.
-         "Source" form shall mean the preferred form for making modifications,
-         including but not limited to software source code, documentation
-         source, and configuration files.
-         "Object" form shall mean any form resulting from mechanical
-         transformation or translation of a Source form, including but
-         not limited to compiled object code, generated documentation,
-         and conversions to other media types.
-         "Work" shall mean the work of authorship, whether in Source or
-         Object form, made available under the License, as indicated by a
-         copyright notice that is included in or attached to the work
-         (an example is provided in the Appendix below).
-         "Derivative Works" shall mean any work, whether in Source or Object
-         form, that is based on (or derived from) the Work and for which the
-         editorial revisions, annotations, elaborations, or other modifications
-         represent, as a whole, an original work of authorship. For the purposes
-         of this License, Derivative Works shall not include works that remain
-         separable from, or merely link (or bind by name) to the interfaces of,
-         the Work and Derivative Works thereof.
-         "Contribution" shall mean any work of authorship, including
-         the original version of the Work and any modifications or additions
-         to that Work or Derivative Works thereof, that is intentionally
-         submitted to Licensor for inclusion in the Work by the copyright owner
-         or by an individual or Legal Entity authorized to submit on behalf of
-         the copyright owner. For the purposes of this definition, "submitted"
-         means any form of electronic, verbal, or written communication sent
-         to the Licensor or its representatives, including but not limited to
-         communication on electronic mailing lists, source code control systems,
-         and issue tracking systems that are managed by, or on behalf of, the
-         Licensor for the purpose of discussing and improving the Work, but
-         excluding communication that is conspicuously marked or otherwise
-         designated in writing by the copyright owner as "Not a Contribution."
-         "Contributor" shall mean Licensor and any individual or Legal Entity
-         on behalf of whom a Contribution has been received by Licensor and
-         subsequently incorporated within the Work.
-      2. Grant of Copyright License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         copyright license to reproduce, prepare Derivative Works of,
-         publicly display, publicly perform, sublicense, and distribute the
-         Work and such Derivative Works in Source or Object form.
-      3. Grant of Patent License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         (except as stated in this section) patent license to make, have made,
-         use, offer to sell, sell, import, and otherwise transfer the Work,
-         where such license applies only to those patent claims licensable
-         by such Contributor that are necessarily infringed by their
-         Contribution(s) alone or by combination of their Contribution(s)
-         with the Work to which such Contribution(s) was submitted. If You
-         institute patent litigation against any entity (including a
-         cross-claim or counterclaim in a lawsuit) alleging that the Work
-         or a Contribution incorporated within the Work constitutes direct
-         or contributory patent infringement, then any patent licenses
-         granted to You under this License for that Work shall terminate
-         as of the date such litigation is filed.
-      4. Redistribution. You may reproduce and distribute copies of the
-         Work or Derivative Works thereof in any medium, with or without
-         modifications, and in Source or Object form, provided that You
-         meet the following conditions:
-         (a) You must give any other recipients of the Work or
-             Derivative Works a copy of this License; and
-         (b) You must cause any modified files to carry prominent notices
-             stating that You changed the files; and
-         (c) You must retain, in the Source form of any Derivative Works
-             that You distribute, all copyright, patent, trademark, and
-             attribution notices from the Source form of the Work,
-             excluding those notices that do not pertain to any part of
-             the Derivative Works; and
-         (d) If the Work includes a "NOTICE" text file as part of its
-             distribution, then any Derivative Works that You distribute must
-             include a readable copy of the attribution notices contained
-             within such NOTICE file, excluding those notices that do not
-             pertain to any part of the Derivative Works, in at least one
-             of the following places: within a NOTICE text file distributed
-             as part of the Derivative Works; within the Source form or
-             documentation, if provided along with the Derivative Works; or,
-             within a display generated by the Derivative Works, if and
-             wherever such third-party notices normally appear. The contents
-             of the NOTICE file are for informational purposes only and
-             do not modify the License. You may add Your own attribution
-             notices within Derivative Works that You distribute, alongside
-             or as an addendum to the NOTICE text from the Work, provided
-             that such additional attribution notices cannot be construed
-             as modifying the License.
-         You may add Your own copyright statement to Your modifications and
-         may provide additional or different license terms and conditions
-         for use, reproduction, or distribution of Your modifications, or
-         for any such Derivative Works as a whole, provided Your use,
-         reproduction, and distribution of the Work otherwise complies with
-         the conditions stated in this License.
-      5. Submission of Contributions. Unless You explicitly state otherwise,
-         any Contribution intentionally submitted for inclusion in the Work
-         by You to the Licensor shall be under the terms and conditions of
-         this License, without any additional terms or conditions.
-         Notwithstanding the above, nothing herein shall supersede or modify
-         the terms of any separate license agreement you may have executed
-         with Licensor regarding such Contributions.
-      6. Trademarks. This License does not grant permission to use the trade
-         names, trademarks, service marks, or product names of the Licensor,
-         except as required for reasonable and customary use in describing the
-         origin of the Work and reproducing the content of the NOTICE file.
-      7. Disclaimer of Warranty. Unless required by applicable law or
-         agreed to in writing, Licensor provides the Work (and each
-         Contributor provides its Contributions) on an "AS IS" BASIS,
-         WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-         implied, including, without limitation, any warranties or conditions
-         of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-         PARTICULAR PURPOSE. You are solely responsible for determining the
-         appropriateness of using or redistributing the Work and assume any
-         risks associated with Your exercise of permissions under this License.
-      8. Limitation of Liability. In no event and under no legal theory,
-         whether in tort (including negligence), contract, or otherwise,
-         unless required by applicable law (such as deliberate and grossly
-         negligent acts) or agreed to in writing, shall any Contributor be
-         liable to You for damages, including any direct, indirect, special,
-         incidental, or consequential damages of any character arising as a
-         result of this License or out of the use or inability to use the
-         Work (including but not limited to damages for loss of goodwill,
-         work stoppage, computer failure or malfunction, or any and all
-         other commercial damages or losses), even if such Contributor
-         has been advised of the possibility of such damages.
-      9. Accepting Warranty or Additional Liability. While redistributing
-         the Work or Derivative Works thereof, You may choose to offer,
-         and charge a fee for, acceptance of support, warranty, indemnity,
-         or other liability obligations and/or rights consistent with this
-         License. However, in accepting such obligations, You may act only
-         on Your own behalf and on Your sole responsibility, not on behalf
-         of any other Contributor, and only if You agree to indemnify,
-         defend, and hold each Contributor harmless for any liability
-         incurred by, or claims asserted against, such Contributor by reason
-         of your accepting any such warranty or additional liability.
-      END OF TERMS AND CONDITIONS
-      APPENDIX: How to apply the Apache License to your work.
-         To apply the Apache License to your work, attach the following
-         boilerplate notice, with the fields enclosed by brackets "[]"
-         replaced with your own identifying information. (Don't include
-         the brackets!)  The text should be enclosed in the appropriate
-         comment syntax for the file format. We also recommend that a
-         file or class name and description of purpose be included on the
-         same "printed page" as the copyright notice for easier
-         identification within third-party archives.
-      Copyright [yyyy] [name of copyright owner]
-      Licensed under the Apache License, Version 2.0 (the "License");
-      you may not use this file except in compliance with the License.
-      You may obtain a copy of the License at
-          http://www.apache.org/licenses/LICENSE-2.0
-      Unless required by applicable law or agreed to in writing, software
-      distributed under the License is distributed on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-      See the License for the specific language governing permissions and
-      limitations under the License.
-   ```
-## Termcolor - [MIT License](https://github.com/termcolor/termcolor/blob/main/COPYING.txt)
-   ```
-   Copyright (c) 2008-2011 Volvox Development Team
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to deal
-   in the Software without restriction, including without limitation the rights
-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-   copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-   The above copyright notice and this permission notice shall be included in
-   all copies or substantial portions of the Software.
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-   THE SOFTWARE.
-   ```
-## Transformers [Apache License 2.0](https://github.com/huggingface/transformers/blob/main/LICENSE)
-   ```
-   Copyright 2018- The Hugging Face team. All rights reserved.
-                                    Apache License
-                              Version 2.0, January 2004
-                           http://www.apache.org/licenses/
-      TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-      1. Definitions.
-         "License" shall mean the terms and conditions for use, reproduction,
-         and distribution as defined by Sections 1 through 9 of this document.
-         "Licensor" shall mean the copyright owner or entity authorized by
-         the copyright owner that is granting the License.
-         "Legal Entity" shall mean the union of the acting entity and all
-         other entities that control, are controlled by, or are under common
-         control with that entity. For the purposes of this definition,
-         "control" means (i) the power, direct or indirect, to cause the
-         direction or management of such entity, whether by contract or
-         otherwise, or (ii) ownership of fifty percent (50%) or more of the
-         outstanding shares, or (iii) beneficial ownership of such entity.
-         "You" (or "Your") shall mean an individual or Legal Entity
-         exercising permissions granted by this License.
-         "Source" form shall mean the preferred form for making modifications,
-         including but not limited to software source code, documentation
-         source, and configuration files.
-         "Object" form shall mean any form resulting from mechanical
-         transformation or translation of a Source form, including but
-         not limited to compiled object code, generated documentation,
-         and conversions to other media types.
-         "Work" shall mean the work of authorship, whether in Source or
-         Object form, made available under the License, as indicated by a
-         copyright notice that is included in or attached to the work
-         (an example is provided in the Appendix below).
-         "Derivative Works" shall mean any work, whether in Source or Object
-         form, that is based on (or derived from) the Work and for which the
-         editorial revisions, annotations, elaborations, or other modifications
-         represent, as a whole, an original work of authorship. For the purposes
-         of this License, Derivative Works shall not include works that remain
-         separable from, or merely link (or bind by name) to the interfaces of,
-         the Work and Derivative Works thereof.
-         "Contribution" shall mean any work of authorship, including
-         the original version of the Work and any modifications or additions
-         to that Work or Derivative Works thereof, that is intentionally
-         submitted to Licensor for inclusion in the Work by the copyright owner
-         or by an individual or Legal Entity authorized to submit on behalf of
-         the copyright owner. For the purposes of this definition, "submitted"
-         means any form of electronic, verbal, or written communication sent
-         to the Licensor or its representatives, including but not limited to
-         communication on electronic mailing lists, source code control systems,
-         and issue tracking systems that are managed by, or on behalf of, the
-         Licensor for the purpose of discussing and improving the Work, but
-         excluding communication that is conspicuously marked or otherwise
-         designated in writing by the copyright owner as "Not a Contribution."
-         "Contributor" shall mean Licensor and any individual or Legal Entity
-         on behalf of whom a Contribution has been received by Licensor and
-         subsequently incorporated within the Work.
-      2. Grant of Copyright License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         copyright license to reproduce, prepare Derivative Works of,
-         publicly display, publicly perform, sublicense, and distribute the
-         Work and such Derivative Works in Source or Object form.
-      3. Grant of Patent License. Subject to the terms and conditions of
-         this License, each Contributor hereby grants to You a perpetual,
-         worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-         (except as stated in this section) patent license to make, have made,
-         use, offer to sell, sell, import, and otherwise transfer the Work,
-         where such license applies only to those patent claims licensable
-         by such Contributor that are necessarily infringed by their
-         Contribution(s) alone or by combination of their Contribution(s)
-         with the Work to which such Contribution(s) was submitted. If You
-         institute patent litigation against any entity (including a
-         cross-claim or counterclaim in a lawsuit) alleging that the Work
-         or a Contribution incorporated within the Work constitutes direct
-         or contributory patent infringement, then any patent licenses
-         granted to You under this License for that Work shall terminate
-         as of the date such litigation is filed.
-      4. Redistribution. You may reproduce and distribute copies of the
-         Work or Derivative Works thereof in any medium, with or without
-         modifications, and in Source or Object form, provided that You
-         meet the following conditions:
-         (a) You must give any other recipients of the Work or
-             Derivative Works a copy of this License; and
-         (b) You must cause any modified files to carry prominent notices
-             stating that You changed the files; and
-         (c) You must retain, in the Source form of any Derivative Works
-             that You distribute, all copyright, patent, trademark, and
-             attribution notices from the Source form of the Work,
-             excluding those notices that do not pertain to any part of
-             the Derivative Works; and
-         (d) If the Work includes a "NOTICE" text file as part of its
-             distribution, then any Derivative Works that You distribute must
-             include a readable copy of the attribution notices contained
-             within such NOTICE file, excluding those notices that do not
-             pertain to any part of the Derivative Works, in at least one
-             of the following places: within a NOTICE text file distributed
-             as part of the Derivative Works; within the Source form or
-             documentation, if provided along with the Derivative Works; or,
-             within a display generated by the Derivative Works, if and
-             wherever such third-party notices normally appear. The contents
-             of the NOTICE file are for informational purposes only and
-             do not modify the License. You may add Your own attribution
-             notices within Derivative Works that You distribute, alongside
-             or as an addendum to the NOTICE text from the Work, provided
-             that such additional attribution notices cannot be construed
-             as modifying the License.
-         You may add Your own copyright statement to Your modifications and
-         may provide additional or different license terms and conditions
-         for use, reproduction, or distribution of Your modifications, or
-         for any such Derivative Works as a whole, provided Your use,
-         reproduction, and distribution of the Work otherwise complies with
-         the conditions stated in this License.
-      5. Submission of Contributions. Unless You explicitly state otherwise,
-         any Contribution intentionally submitted for inclusion in the Work
-         by You to the Licensor shall be under the terms and conditions of
-         this License, without any additional terms or conditions.
-         Notwithstanding the above, nothing herein shall supersede or modify
-         the terms of any separate license agreement you may have executed
-         with Licensor regarding such Contributions.
-      6. Trademarks. This License does not grant permission to use the trade
-         names, trademarks, service marks, or product names of the Licensor,
-         except as required for reasonable and customary use in describing the
-         origin of the Work and reproducing the content of the NOTICE file.
-      7. Disclaimer of Warranty. Unless required by applicable law or
-         agreed to in writing, Licensor provides the Work (and each
-         Contributor provides its Contributions) on an "AS IS" BASIS,
-         WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-         implied, including, without limitation, any warranties or conditions
-         of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-         PARTICULAR PURPOSE. You are solely responsible for determining the
-         appropriateness of using or redistributing the Work and assume any
-         risks associated with Your exercise of permissions under this License.
-      8. Limitation of Liability. In no event and under no legal theory,
-         whether in tort (including negligence), contract, or otherwise,
-         unless required by applicable law (such as deliberate and grossly
-         negligent acts) or agreed to in writing, shall any Contributor be
-         liable to You for damages, including any direct, indirect, special,
-         incidental, or consequential damages of any character arising as a
-         result of this License or out of the use or inability to use the
-         Work (including but not limited to damages for loss of goodwill,
-         work stoppage, computer failure or malfunction, or any and all
-         other commercial damages or losses), even if such Contributor
-         has been advised of the possibility of such damages.
-      9. Accepting Warranty or Additional Liability. While redistributing
-         the Work or Derivative Works thereof, You may choose to offer,
-         and charge a fee for, acceptance of support, warranty, indemnity,
-         or other liability obligations and/or rights consistent with this
-         License. However, in accepting such obligations, You may act only
-         on Your own behalf and on Your sole responsibility, not on behalf
-         of any other Contributor, and only if You agree to indemnify,
-         defend, and hold each Contributor harmless for any liability
-         incurred by, or claims asserted against, such Contributor by reason
-         of your accepting any such warranty or additional liability.
-      END OF TERMS AND CONDITIONS
-      APPENDIX: How to apply the Apache License to your work.
-         To apply the Apache License to your work, attach the following
-         boilerplate notice, with the fields enclosed by brackets "[]"
-         replaced with your own identifying information. (Don't include
-         the brackets!)  The text should be enclosed in the appropriate
-         comment syntax for the file format. We also recommend that a
-         file or class name and description of purpose be included on the
-         same "printed page" as the copyright notice for easier
-         identification within third-party archives.
-      Copyright [yyyy] [name of copyright owner]
-      Licensed under the Apache License, Version 2.0 (the "License");
-      you may not use this file except in compliance with the License.
-      You may obtain a copy of the License at
-          http://www.apache.org/licenses/LICENSE-2.0
-      Unless required by applicable law or agreed to in writing, software
-      distributed under the License is distributed on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-      See the License for the specific language governing permissions and
-      limitations under the License.
-   ```

cosmos-transfer1/CONTRIBUTING.md DELETED Viewed

@@ -1,51 +0,0 @@
-# How to Contribute
-We'd love to receive your patches and contributions. Please keep your PRs as draft until such time that you would like us to review them.
-## Code Reviews
-All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult
-[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests.
-## Signing Your Work
-* We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license.
-  * Any contribution which contains commits that are not Signed-Off will not be accepted.
-* To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes:
-  ```bash
-  $ git commit -s -m "Add cool feature."
-  ```
-  This will append the following to your commit message:
-  ```
-  Signed-off-by: Your Name <[email protected]>
-  ```
-* Full text of the DCO:
-  ```
-    Developer Certificate of Origin
-    Version 1.1
-    Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
-    1 Letterman Drive
-    Suite D4700
-    San Francisco, CA, 94129
-    Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
-  ```
-  ```
-    Developer's Certificate of Origin 1.1
-    By making a contribution to this project, I certify that:
-    (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or
-    (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or
-    (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
-    (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
-  ```

cosmos-transfer1/Dockerfile DELETED Viewed

@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Use NVIDIA PyTorch container as base image
-FROM nvcr.io/nvidia/tritonserver:25.04-vllm-python-py3
-# Install basic tools
-RUN apt-get update && apt-get install -y git tree ffmpeg wget
-RUN rm /bin/sh && ln -s /bin/bash /bin/sh && ln -s /lib64/libcuda.so.1 /lib64/libcuda.so
-RUN apt-get install -y libglib2.0-0
-# Copy the cosmos-transfer1.yaml and requirements.txt files to the container
-COPY ./cosmos-transfer1.yaml /cosmos-transfer1.yaml
-COPY ./requirements_docker.txt /requirements.txt
-RUN ls -l /usr/lib/python3/dist-packages/blinker-1.7.0.dist-info && rm -rf /usr/lib/python3/dist-packages/blinker-1.7.0.dist-info
-RUN echo "Installing dependencies. This will take a while..." && \
-    pip install --no-cache-dir -r /requirements.txt && \
-    pip install -v --upgrade --no-build-isolation --no-dependencies sam2==1.1.0 && \
-    pip install transformer-engine[pytorch] && \
-    pip install decord==0.6.0 && \
-    git clone https://github.com/NVIDIA/apex && \
-    pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./apex && \
-    rm -rf apex && \
-    pip install -v decord==0.6.0 && \
-    echo "Environment setup complete"
-# Create Python symlink
-RUN ln -s /usr/bin/python3.12 /usr/bin/python
-RUN apt-get install -y libmagic1
-RUN mkdir -p /workspace
-WORKDIR /workspace
-CMD ["/bin/bash"]

cosmos-transfer1/INSTALL.md DELETED Viewed

@@ -1,88 +0,0 @@
-## Environment setup
-Clone the `cosmos-transfer1` source code
-```bash
-git clone [email protected]:nvidia-cosmos/cosmos-transfer1.git
-cd cosmos-transfer1
-git submodule update --init --recursive
-```
-Cosmos runs only on Linux systems. We have tested the installation with Ubuntu 24.04, 22.04, and 20.04.
-Cosmos requires the Python version to be `3.12.x`.
-### Inference using conda
-Please also make sure you have `conda` installed ([instructions](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html)).
-The below commands create the `cosmos-transfer1` conda environment and install the dependencies for inference:
-```bash
-# Create the cosmos-transfer1 conda environment.
-conda env create --file cosmos-transfer1.yaml
-# Activate the cosmos-transfer1 conda environment.
-conda activate cosmos-transfer1
-# Install the dependencies.
-pip install -r requirements.txt
-# Install vllm
-pip install https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl
-export VLLM_ATTENTION_BACKEND=FLASHINFER
-pip install vllm==0.9.0
-# Install decord
-pip install decord==0.6.0
-# Patch Transformer engine linking issues in conda environments.
-ln -sf $CONDA_PREFIX/lib/python3.12/site-packages/nvidia/*/include/* $CONDA_PREFIX/include/
-ln -sf $CONDA_PREFIX/lib/python3.12/site-packages/nvidia/*/include/* $CONDA_PREFIX/include/python3.12
-# Install Transformer engine.
-pip install transformer-engine[pytorch]
-```
-To test the environment setup for inference run
-```bash
-PYTHONPATH=$(pwd) python scripts/test_environment.py
-```
-### Inference using docker
-If you prefer to use a containerized environment, you can build and run this repo's dockerfile to get an environment with all the packages pre-installed. This environment does not use conda. So, there is no need to specify `CUDA_HOME=$CONDA_PREFIX` when invoking this repo's scripts.
-This requires docker to be already present on your system with the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed.
-```bash
-docker build -f Dockerfile . -t nvcr.io/$USER/cosmos-transfer1:latest
-```
-Note: In case you encounter permission issues while mounting local files inside the docker, you can share the folders from your current directory to all users (including docker) using this helpful alias
-```
-alias share='sudo chown -R ${USER}:users $PWD && sudo chmod g+w $PWD'
-```
-before running the docker.
-### Training
-The below commands creates the `cosmos-transfer` conda environment and installs the dependencies for training. This is the same as required for inference.
-```bash
-# Create the cosmos-transfer1 conda environment.
-conda env create --file cosmos-transfer1.yaml
-# Activate the cosmos-transfer1 conda environment.
-conda activate cosmos-transfer1
-# Install the dependencies.
-pip install -r requirements.txt
-# Install vllm
-pip install https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl
-export VLLM_ATTENTION_BACKEND=FLASHINFER
-pip install vllm==0.9.0
-# Install decord
-pip install decord==0.6.0
-# Patch Transformer engine linking issues in conda environments.
-ln -sf $CONDA_PREFIX/lib/python3.12/site-packages/nvidia/*/include/* $CONDA_PREFIX/include/
-ln -sf $CONDA_PREFIX/lib/python3.12/site-packages/nvidia/*/include/* $CONDA_PREFIX/include/python3.12
-# Install Transformer engine.
-pip install transformer-engine[pytorch]
-# Install Apex for full training with bfloat16.
-git clone https://github.com/NVIDIA/apex
-pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./apex
-```
-You can test the environment setup for post-training with
-```bash
-PYTHONPATH=$(pwd) python scripts/test_environment.py --training
-```

cosmos-transfer1/LICENSE DELETED Viewed

@@ -1,201 +0,0 @@
-                                Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

cosmos-transfer1/README.md DELETED Viewed

@@ -1,102 +0,0 @@
-<p align="center">
-    <img src="assets/nvidia-cosmos-header.png" alt="NVIDIA Cosmos Header">
-</p>
-### [Product Website](https://www.nvidia.com/en-us/ai/cosmos/) | [Hugging Face](https://huggingface.co/collections/nvidia/cosmos-transfer1-67c9d328196453be6e568d3e) | [Paper](https://arxiv.org/abs/2503.14492) | [Paper Website](https://research.nvidia.com/labs/dir/cosmos-transfer1/)
-Cosmos-Transfer1 is a key branch of Cosmos World Foundation Models (WFMs) specialized for multimodal controllable conditional world generation or world2world transfer. The three main branches of Cosmos WFMs are [cosmos-predict](https://github.com/nvidia-cosmos/cosmos-predict1), [cosmos-transfer](https://github.com/nvidia-cosmos/cosmos-transfer1), and [cosmos-reason](https://github.com/nvidia-cosmos/cosmos-reason1). We visualize the architecture of Cosmos-Transfer1 in the following figure.
-<p align="center">
-    <img src="assets/transfer1_diagram.png" alt="Cosmos-Transfer1 Architecture Diagram">
-</p>
-Cosmos-Transfer1 includes the following:
-- **ControlNet-based single modality conditional world generation** where a user can generate visual simulation based on one of the following modalities: segmentation video, depth video, edge video, blur video, LiDAR video, or HDMap video. Cosmos-Transfer1 generates a video based on the signal modality conditional input, a user text prompt, and, optionally, an input RGB video frame prompt (which could be from the last video generation result when operating in the autoregressive setting). We will use Cosmos-Transfer1-7B [Modality] to refer to the model operating in this setting. For example, Cosmos-Transfer1-7B [Depth] refers to a depth ControlNet model.
-- **MultiControlNet-based multimodal conditional world generation** where a user can generate visual simulation based on any combination of segmentation video, depth video, edge video, and blur video (LiDAR video and HDMap in the AV sample) with a spatiotemporal control map to control the stregnth of each modality across space and time. Cosmos-Transfer1 generates a video based on the multimodal conditional inputs, a user text prompt, and, optionally, an input RGB video frame prompt (This could be from the last video generation result when operating in the autoregressive setting.). This is the preferred mode of Cosmos-Transfer. We will refer it as Cosmos-Transfer1-7B.
-- **4KUpscaler** for upscaling a 720p-resolution video to a 4K-resolution video.
-- **Post-training scripts** for helping Physical AI builders post-train pre-trained Cosmos-Transfer1 for their applications.
-- **Pre-training scripts** for helping Physical AI builders train their own Cosmos-Transfer1 models from scratch.
-## News
-- [2025/05] **Cosmos AV Single2MultiView** is available! Now you can create dynamic, multi-view clips from just one video. Try it out and tell us what you think!
-    - [Inference guide](examples/inference_cosmos_transfer1_7b_sample_av_single2multiview.md)
-    - [Build your own or PyTorch post-training](examples/training_cosmos_transfer_7B_sample_AV.md)
-    - [Hugging Face model](https://huggingface.co/nvidia/Cosmos-Transfer1-7B-Sample-AV-Single2MultiView)
-- [2025/04] [Post training](README.md#post-train-pre-trained-cosmos-transfer1-models) is available! Now you can customize Transfer1 models in your own way. Please try it out and we look forward to your feedback.
-## Example Model Behavior
-[Cosmos-Transfer LiDAR + HDMap Conditional Inputs -> World](https://github.com/nvidia-cosmos/cosmos-transfer1)
-<video src="https://github.com/user-attachments/assets/169cf5c5-de59-44db-b1bf-19fb57cb7e2e">
-  Your browser does not support the video tag.
-</video>
-[Cosmos-Transfer Multimodal Conditional Inputs -> World](https://github.com/nvidia-cosmos/cosmos-transfer1)
-<video src="https://github.com/user-attachments/assets/4c1da01f-c3fd-4b6c-b084-f5ef653abb80">
-  Your browser does not support the video tag.
-</video>
-## Getting Started
-We provide a comphrehensive set of examples to illustrate how to perform inference, post-training, etc, with Cosmos-Transfer1. Click a relevant example below and start your Cosmos journey.
-### Installation
-Please refer to [INSTALL.md](INSTALL.md) for general instructions on environment setup.
-### Inference with pre-trained Cosmos-Transfer1 models
-* [Inference with pre-trained Cosmos-Transfer1-7B](/examples/inference_cosmos_transfer1_7b.md) **[with multi-GPU support]**
-* [Inference with pre-trained Cosmos-Transfer1-7B-Sample-AV](/examples/inference_cosmos_transfer1_7b_sample_av.md) **[with multi-GPU support]**
-* [Inference with pre-trained Cosmos-Transfer1-7B-4KUpscaler](/examples/inference_cosmos_transfer1_7b_4kupscaler.md) **[with multi-GPU support]**
-* [Inference with pre-trained Cosmos-Transfer1-7B (Depth)](examples/inference_cosmos_transfer1_7b_depth.md)
-* [Inference with pre-trained Cosmos-Transfer1-7B (Segmentation)](examples/inference_cosmos_transfer1_7b_seg.md)
-* [Inference with pre-trained Cosmos-Transfer1-7B (Edge)](examples/inference_cosmos_transfer1_7b.md#example-1-single-control-edge)
-* [Inference with pre-trained Cosmos-Transfer1-7B (Vis)](examples/inference_cosmos_transfer1_7b_vis.md)
-* [Inference with pre-trained Cosmos-Transfer1pt1-7B [Keypoint]](/examples/inference_cosmos_transfer1pt1_7b_keypoint.md)
-* [Inference with pre-trained Cosmos-Transfer1-7B-Sample-AV-Multiview](/examples/inference_cosmos_transfer1_7b_sample_av_single2multiview.md)
-### Post-train pre-trained Cosmos-Transfer1 models
-* [Post-train pre-trained Cosmos-Transfer1-7B [Depth | Edge | Keypoint | Segmentation | Vis]](examples/training_cosmos_transfer_7b.md) **[with multi-GPU support]**
-* [Post-train pre-trained Cosmos-Transfer1-7B-Sample-AV [LiDAR|HDMap]](examples/training_cosmos_transfer_7B_sample_AV.md) **[with multi-GPU support]**
-* [Post-train pre-trained Cosmos-Transfer1-7B-Sample-AV-Multiview[LiDAR|HDMap]](examples/training_cosmos_transfer_7B_sample_AV.md) **[with multi-GPU support]**
-### Build your own Cosmos-Transfer1 models from scratch
-* [Pre-train Cosmos-Transfer1-7B [Depth | Edge | Keypoint | Segmentation | Vis]](examples/training_cosmos_transfer_7b.md) **[with multi-GPU support]**
-* [Pre-train Cosmos-Transfer1-7B-Sample-AV [LiDAR|HDMap]](examples/training_cosmos_transfer_7B_sample_AV.md) **[with multi-GPU support]**
-* [Pre-train Cosmos-Transfer1-7B-Sample-AV-Multiview[LiDAR|HDMap]](examples/training_cosmos_transfer_7B_sample_AV.md) **[with multi-GPU support]**
-### Workflow
-*[Robotics Augmentation Workflow](/cosmos_transfer1/auxiliary/robot_augmentation/README.md): Scene augmentation for robotic manipulation, mapping one robotics synthetic example to multiple realistic examples
-<video src="https://github.com/user-attachments/assets/6dee15f5-9d8b-469a-a92a-3419cb466d44">
-  Your browser does not support the video tag.
-</video>
-## Cosmos-Transfer1 Models
-* [Cosmos-Transfer1-7B](https://huggingface.co/nvidia/Cosmos-Transfer1-7B): multimodal controllable conditional world generation with adaptive spatiotemporal control map. The supported modalities include segmentation, depth, canny edge, and blur visual.
-* [Cosmos-Transfer1-7B [Depth | Edge | Keypoint | Segmentation | Vis]](https://huggingface.co/nvidia/Cosmos-Transfer1-7B): single modality controllable conditional world generation. This refers to Cosmos-Transfer1-7B operates on the single modality case and is reduced to a ControlNet.
-* [Cosmos-Transfer1-7B-Sample-AV](https://huggingface.co/nvidia/Cosmos-Transfer1-7B-Sample-AV): multimodal controllable conditional world generation with adaptive spatiotemporal control map specialized for autonomous vehicle applications. The supported modalities include LiDAR and HDMap.
-* [Cosmos-Transfer1-7B [LiDAR | HDMap]](https://huggingface.co/nvidia/Cosmos-Transfer1-7B-Sample-AV): single modality controllable conditional world generation for autonomous vehicle applications. This refers to Cosmos-Transfer1-7B-Sample-AV operates on the single modality case and is reduced to a ControlNet.
-* [Cosmos-Transfer1-7B-4KUpscaler](https://huggingface.co/nvidia/Cosmos-Transfer1-7B-4KUpscaler): 4K upscaler to super-resolute 720p videos to 4K videos.
-## License and Contact
-This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use.
-This model includes safety and content moderation features powered by Llama Guard 3. Llama Guard 3 is used solely as a content input filter and is subject to its own license.
-NVIDIA Cosmos source code is released under the [Apache 2 License](https://www.apache.org/licenses/LICENSE-2.0).
-NVIDIA Cosmos models are released under the [NVIDIA Open Model License](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license). For a custom license, please contact [[email protected]](mailto:[email protected]).

cosmos-transfer1/checkpoints/README.md DELETED Viewed

@@ -1,3 +0,0 @@
-# Checkpoint directory
-Follow our instructions for downloading checkpoints in [Cosmos Diffusion Inference](../cosmos_transfer1/diffusion/README.md#download-checkpoints). Cosmos checkpoints will be downloaded to this directory.

cosmos-transfer1/cosmos-transfer1.yaml DELETED Viewed

@@ -1,30 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-name: cosmos-transfer1
-channels:
-  - conda-forge
-dependencies:
-  - python=3.12
-  - pip=25.0
-  - cmake
-  - ninja
-  - libgl
-  - ffmpeg
-  - gcc=12.4.0
-  - gxx=12.4.0
-  - cuda=12.4
-  - cuda-nvcc=12.4
-  - cuda-toolkit=12.4

cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/inference/__init__.py DELETED Viewed

File without changes

cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/inference/depth_anything_pipeline.py DELETED Viewed

@@ -1,55 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-from PIL import Image
-from cosmos_transfer1.auxiliary.depth_anything.model.depth_anything import DepthAnythingModel
-def parse_args():
-    parser = argparse.ArgumentParser(description="Depth Estimation using Depth Anything V2")
-    parser.add_argument("--input", type=str, required=True, help="Path to input image or video file")
-    parser.add_argument("--output", type=str, required=True, help="Path to save the output image or video")
-    parser.add_argument(
-        "--mode",
-        type=str,
-        choices=["image", "video"],
-        default="image",
-        help="Processing mode: 'image' for a single image, 'video' for a video file",
-    )
-    return parser.parse_args()
-def main():
-    args = parse_args()
-    model = DepthAnythingModel()
-    if args.mode == "image":
-        # Load the input image and predict its depth
-        image = Image.open(args.input).convert("RGB")
-        depth_image = model.predict_depth(image)
-        depth_image.save(args.output)
-        print(f"Depth image saved to {args.output}")
-    elif args.mode == "video":
-        # Process the video and save the output
-        out_path = model.predict_depth_video(args.input, args.output)
-        if out_path:
-            print(f"Depth video saved to {out_path}")
-if __name__ == "__main__":
-    main()

cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/model/__init__.py DELETED Viewed

File without changes

cosmos-transfer1/cosmos_transfer1/auxiliary/depth_anything/model/depth_anything.py DELETED Viewed

@@ -1,151 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import cv2
-import imageio
-import numpy as np
-import torch
-from PIL import Image
-from transformers import AutoImageProcessor, AutoModelForDepthEstimation
-from cosmos_transfer1.checkpoints import DEPTH_ANYTHING_MODEL_CHECKPOINT
-from cosmos_transfer1.utils import log
-class DepthAnythingModel:
-    def __init__(self):
-        """
-        Initialize the Depth Anything model and its image processor.
-        """
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Load image processor and model with half precision
-        print(f"Loading Depth Anything model - {DEPTH_ANYTHING_MODEL_CHECKPOINT}...")
-        self.image_processor = AutoImageProcessor.from_pretrained(
-            DEPTH_ANYTHING_MODEL_CHECKPOINT,
-            torch_dtype=torch.float16,
-            trust_remote_code=True,
-        )
-        self.model = AutoModelForDepthEstimation.from_pretrained(
-            DEPTH_ANYTHING_MODEL_CHECKPOINT,
-            torch_dtype=torch.float16,
-            trust_remote_code=True,
-        ).to(self.device)
-    def predict_depth(self, image: Image.Image) -> Image.Image:
-        """
-        Process a single PIL image and return a depth map as a uint16 PIL Image.
-        """
-        # Prepare inputs for the model
-        inputs = self.image_processor(images=image, return_tensors="pt")
-        # Move all tensors to the proper device with half precision
-        inputs = {k: v.to(self.device, dtype=torch.float16) for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            predicted_depth = outputs.predicted_depth
-        # Interpolate the predicted depth to the original image size
-        prediction = torch.nn.functional.interpolate(
-            predicted_depth.unsqueeze(1),
-            size=image.size[::-1],  # PIL image size is (width, height), interpolate expects (height, width)
-            mode="bicubic",
-            align_corners=False,
-        )
-        # Convert the output tensor to a numpy array and save as a depth image
-        output = prediction.squeeze().cpu().numpy()
-        depth_image = DepthAnythingModel.save_depth(output)
-        return depth_image
-    def __call__(self, input_video: str, output_video: str = "depth.mp4") -> str:
-        """
-        Process a video file frame-by-frame to produce a depth-estimated video.
-        The output video is saved as an MP4 file.
-        """
-        log.info(f"Processing video: {input_video} to generate depth video: {output_video}")
-        assert os.path.exists(input_video)
-        cap = cv2.VideoCapture(input_video)
-        if not cap.isOpened():
-            print("Error: Cannot open video file.")
-            return
-        # Retrieve video properties
-        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        depths = []
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            # Convert frame from BGR to RGB and then to PIL Image
-            image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-            inputs = self.image_processor(images=image, return_tensors="pt")
-            inputs = {k: v.to(self.device, dtype=torch.float16) for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                predicted_depth = outputs.predicted_depth
-            # For video processing, take the first output and interpolate to original size
-            prediction = torch.nn.functional.interpolate(
-                predicted_depth[0].unsqueeze(0).unsqueeze(0),
-                size=(frame_height, frame_width),
-                mode="bicubic",
-                align_corners=False,
-            )
-            depth = prediction.squeeze().cpu().numpy()
-            depths += [depth]
-        cap.release()
-        depths = np.stack(depths)
-        depths_normed = (depths - depths.min()) / (depths.max() - depths.min() + 1e-8) * 255.0
-        depths_normed = depths_normed.astype(np.uint8)
-        os.makedirs(os.path.dirname(output_video), exist_ok=True)
-        self.write_video(depths_normed, output_video, fps=fps)
-        return output_video
-    @staticmethod
-    def save_depth(output: np.ndarray) -> Image.Image:
-        """
-        Convert the raw depth output (float values) into a uint16 PIL Image.
-        """
-        depth_min = output.min()
-        depth_max = output.max()
-        max_val = (2**16) - 1  # Maximum value for uint16
-        if depth_max - depth_min > np.finfo("float").eps:
-            out_array = max_val * (output - depth_min) / (depth_max - depth_min)
-        else:
-            out_array = np.zeros_like(output)
-        formatted = out_array.astype("uint16")
-        depth_image = Image.fromarray(formatted, mode="I;16")
-        return depth_image
-    @staticmethod
-    def write_video(frames, output_path, fps=30):
-        with imageio.get_writer(output_path, fps=fps, macro_block_size=8) as writer:
-            for frame in frames:
-                if len(frame.shape) == 2:  # single channel
-                    frame = frame[:, :, None].repeat(3, axis=2)
-                writer.append_data(frame)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/README.md DELETED Viewed

@@ -1,17 +0,0 @@
-# Cosmos Guardrail
-This page outlines a set of tools to ensure content safety in Cosmos. For implementation details, please consult the [Cosmos paper](https://research.nvidia.com/publication/2025-01_cosmos-world-foundation-model-platform-physical-ai).
-## Overview
-Our guardrail system consists of two stages: pre-Guard and post-Guard.
-Cosmos pre-Guard models are applied to text input, including input prompts and upsampled prompts.
-* Blocklist: a keyword list checker for detecting harmful keywords
-* Llama Guard 3: an LLM-based approach for blocking harmful prompts
-Cosmos post-Guard models are applied to video frames generated by Cosmos models.
-* Video Content Safety Filter: a classifier trained to distinguish between safe and unsafe video frames
-* Face Blur Filter: a face detection and blurring module

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/aegis/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/aegis/aegis.py DELETED Viewed

@@ -1,135 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import os
-import torch
-from peft import PeftModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from cosmos_transfer1.auxiliary.guardrail.aegis.categories import UNSAFE_CATEGORIES
-from cosmos_transfer1.auxiliary.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner
-from cosmos_transfer1.checkpoints import AEGIS_MODEL_CHECKPOINT, LLAMA_GUARD_MODEL_CHECKPOINT
-from cosmos_transfer1.utils import log, misc
-SAFE = misc.Color.green("SAFE")
-UNSAFE = misc.Color.red("UNSAFE")
-class Aegis(ContentSafetyGuardrail):
-    def __init__(
-        self,
-        checkpoint_dir: str,
-        device="cuda" if torch.cuda.is_available() else "cpu",
-    ) -> None:
-        self.checkpoint_dir = checkpoint_dir
-        self.device = device
-        self.dtype = torch.bfloat16
-        base_model_id = "meta-llama/LlamaGuard-7b"
-        aegis_adapter = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0"
-        base_model_dir = os.path.join(self.checkpoint_dir, base_model_id)
-        aegis_adapter_dir = os.path.join(self.checkpoint_dir, aegis_adapter)
-        base_model = AutoModelForCausalLM.from_pretrained(base_model_id, cache_dir=base_model_dir)
-        self.tokenizer = AutoTokenizer.from_pretrained(base_model_id, cache_dir=base_model_dir)
-        self.model = PeftModel.from_pretrained(base_model, aegis_adapter, cache_dir=aegis_adapter_dir)
-        self.model.to(self.device, dtype=self.dtype).eval()
-    def get_moderation_prompt(self, user_prompt: str) -> str:
-        """Create the moderation prompt for the Aegis model."""
-        full_prompt = f"""[INST] Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.
-        <BEGIN UNSAFE CONTENT CATEGORIES>
-        {"".join([f"{category['instruction']}" for category in UNSAFE_CATEGORIES])}
-        <END UNSAFE CONTENT CATEGORIES>
-        <BEGIN CONVERSATION>
-        """
-        full_prompt += f"User: {user_prompt}\n\n"
-        full_prompt += """<END CONVERSATION>
-        Provide your safety assessment for User in the above conversation:
-        - First line must read 'safe' or 'unsafe'.
-        - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
-        return full_prompt
-    def get_aegis_block_message(self, moderation_output: str) -> str:
-        """Extract the blocked category and reason from the Aegis model output."""
-        block_msg = "Prompt blocked by Aegis."
-        try:
-            # Extract blocked category and reason
-            if len(moderation_output.split("\n")) >= 2:
-                moderation_output_list = moderation_output.split("\n")
-                blocked_category = int(moderation_output_list[1].lstrip("O")) - 1
-                if 0 <= blocked_category < len(UNSAFE_CATEGORIES):
-                    blocked_reason = UNSAFE_CATEGORIES[blocked_category]["blocked_reason"]
-                    blocked_category_name = UNSAFE_CATEGORIES[blocked_category]["category"]
-                    block_msg = f"{blocked_category_name}: {blocked_reason}"
-        except Exception as e:
-            log.warning(f"Unable to extract blocked category and reason from Aegis output: {e}")
-        return block_msg
-    def filter_aegis_output(self, prompt: str) -> tuple[bool, str]:
-        """Filter the Aegis model output and return the safety status and message."""
-        full_prompt = self.get_moderation_prompt(prompt)
-        inputs = self.tokenizer([full_prompt], add_special_tokens=False, return_tensors="pt").to(self.device)
-        output = self.model.generate(**inputs, max_new_tokens=100, pad_token_id=self.tokenizer.eos_token_id)
-        prompt_len = inputs["input_ids"].shape[-1]
-        moderation_output = self.tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
-        if "unsafe" in moderation_output.lower():
-            block_msg = self.get_aegis_block_message(moderation_output)
-            return False, block_msg
-        else:
-            return True, ""
-    def is_safe(self, prompt: str) -> tuple[bool, str]:
-        """Check if the input prompt is safe according to the Aegis model."""
-        try:
-            return self.filter_aegis_output(prompt)
-        except Exception as e:
-            log.error(f"Unexpected error occurred when running Aegis guardrail: {e}")
-            return True, "Unexpected error occurred when running Aegis guardrail."
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--prompt", type=str, required=True, help="Input prompt")
-    parser.add_argument(
-        "--checkpoint_dir",
-        type=str,
-        help="Path to the Aegis checkpoint folder",
-        default=DEFAULT_CHECKPOINT_DIR,
-    )
-    return parser.parse_args()
-def main(args):
-    aegis = Aegis(checkpoint_dir=args.checkpoint_dir)
-    runner = GuardrailRunner(safety_models=[aegis])
-    with misc.timer("aegis safety check"):
-        safety, message = runner.run_safety_check(args.prompt)
-    log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
-    log.info(f"Message: {message}") if not safety else None
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/aegis/categories.py DELETED Viewed

@@ -1,192 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-VIOLENCE = {
-    "category": "violence",
-    "blocked_reason": """This prompt is blocked due to the presence of violent content.""",
-    "instruction": """O1: Violence.
-Should not
-- Help people plan or engage in violence.
-- Encourage people to engage in violence.
-- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.
-Can
-- Provide information on violence.
-- Discuss the topics of violence, and can discuss historical events involving violence.
-""",
-}
-SEXUAL = {
-    "category": "sexual",
-    "blocked_reason": """This prompt is blocked due to the presence of sexual content.""",
-    "instruction": """O2: Sexual.
-Should not
-- Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities.
-- Engage or encourage sexual activity that is considered illegal.
-Can
-- Discuss topics related to sexuality and sexual education.
-- Provide general information regarding sexual activities.
-- Provide guidance on topics related to sexual health.
-""",
-}
-CRIMINAL_PLANNING = {
-    "category": "criminal_planning",
-    "blocked_reason": """This prompt is blocked due to the presence of criminal planning/confessions content.""",
-    "instruction": """O3: Criminal Planning/Confessions.
-Should not
-- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.
-- Encourage people to engage in criminal activities or condone criminal behavior.
-- Have a confession of crime that was committed or will be committed in future.
-Can
-- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.
-- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.
-""",
-}
-WEAPONS = {
-    "category": "weapons",
-    "blocked_reason": """This prompt is blocked due to the presence of weapons content.""",
-    "instruction": """O4: Guns and Illegal Weapons.
-Should not
-- Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them.
-- Encourage people to engage in firearm or illegal weapons crimes or condone such behavior.
-- Assist or encourage people to execute or own other illegal weapons for example biological agents, or chemical weapons, including instructions on how to create or use them.
-Can
-- Discuss firearms and the arguments for and against firearm ownership.
-""",
-}
-SUBSTANCE_ABUSE = {
-    "category": "substance_abuse",
-    "blocked_reason": """This prompt is blocked due to the presence of substance abuse content.""",
-    "instruction": """O5: Controlled/Regulated Substances.
-Should not
-- Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis.
-- Assist or encourage people to create such substances when it is illegal to do so.
-Can
-- Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are).
-- Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation.
-""",
-}
-SUICIDE = {
-    "category": "suicide",
-    "blocked_reason": "This prompt is blocked due to the presence of suicide and self harm content.",
-    "instruction": """O6: Suicide and Self Harm.
-Should not
-- Encourage people to harm themselves.
-- Romanticize or condone self-harm or suicide.
-- Provide information regarding the methods of suicide or self-harm.
-- Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves.
-- Contain confessions or intent of self harm
-Can
-- Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions).
-Should
-- Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves.
-""",
-}
-CSAM = {
-    "category": "csam",
-    "blocked_reason": "This prompt contains material related to Child Sexual Abuse",
-    "instruction": """O7: Sexual (minor).
-Should not
-- Assist, engage or encourage intention to engage in sexual activity with an underage/minor.
-- Discuss or plan sexual acts, sharing of media of sexual nature, personal information or intention to meet in person for sexual activity with a minor.
-- Should not include discussion where private information is shared with underage minors for purposes of acting on them and taking actions that are sexual in nature including meeting or sharing media.
-- Should not discuss child abuse or engage, encourage, assist in sexual grooming/trafficking of children.
-Can
-- Provide information about sex education for use in schools.
-""",
-}
-HATRED = {
-    "category": "hatred",
-    "blocked_reason": "This prompt is blocked due to the presence of hate speech content.",
-    "instruction": """O8: Hate/Identity Hate.
-Should not
-- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their
-race, color, ethnicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.
-- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.
-- Should not discuss prejudice towards a group classified as protected according to the law of Germany, UK, USA or any other country.
-- Should not contain slurs.
-- Should not contain text that is distasteful, offensive, or hateful that causes others to feel grief, anger, or fear.
-Can
-- Provide information on discrimination.
-- Discuss the topics of discrimination, and can discuss historical events involving discrimination.
-- Discuss neutral statements referring to group identity and  contextualized hate speech (that talks about hate but does not incite hate).
-""",
-}
-HARASSMENT = {
-    "category": "harassment",
-    "blocked_reason": "This prompt is blocked due to the presence of harassment content.",
-    "instruction": """O10: Harassment.
-Should not
-- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.
-- Engage in harassment, misconduct and unlawful discrimination
-- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,
-Sexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),
-Religion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,
-- Taking or requesting statutorily protected leave Body characteristics.
-- Promise to give a benefit, or a threat to retaliate or take an adverse action based on a response to harassment and misconduct.
-- Bullying.
-- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.
-- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.
-Can
-- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.
-- Provide information on protected characteristics or refers, cites material that describes what protected characteristics are.
-- Provide information or refer or cite material that informs what harassment is.
-""",
-}
-THREAT = {
-    "category": "threat",
-    "blocked_reason": "This prompt is blocked due to the presence of threat content.",
-    "instruction": """O11: Threat.
-Should not
-- Discuss a communication of intent to inflict harm or loss on another person.
-- Should not contain discussion of intention of intimidation which  is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.
-- Should not contain intention of knowingly putting another person in fear of bodily injury.
-- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material  or any kind of loss to a person.
-- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.
-Can
-- Provide information about threat is and the meaning of threat.
-""",
-}
-PROFANITY = {
-    "category": "profanity",
-    "blocked_reason": "This prompt is blocked due to the presence of profanity content.",
-    "instruction": """O12: Profanity.
-Should not
-- Contain swear words, curse words, or other obscene or profane language
-- This includes offensive words used without any intention to act on them or without any explicit targeted recipient
-""",
-}
-UNSAFE_CATEGORIES = [
-    VIOLENCE,
-    SEXUAL,
-    CRIMINAL_PLANNING,
-    WEAPONS,
-    SUBSTANCE_ABUSE,
-    SUICIDE,
-    CSAM,
-    HATRED,
-    HARASSMENT,
-    THREAT,
-    PROFANITY,
-]

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/blocklist/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/blocklist/blocklist.py DELETED Viewed

@@ -1,216 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import os
-import re
-import string
-from difflib import SequenceMatcher
-import nltk
-from better_profanity import profanity
-from cosmos_transfer1.auxiliary.guardrail.blocklist.utils import read_keyword_list_from_dir, to_ascii
-from cosmos_transfer1.auxiliary.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner
-from cosmos_transfer1.utils import log, misc
-CENSOR = misc.Color.red("*")
-class Blocklist(ContentSafetyGuardrail):
-    def __init__(
-        self,
-        checkpoint_dir: str,
-        guardrail_partial_match_min_chars: int = 6,
-        guardrail_partial_match_letter_count: float = 0.4,
-    ) -> None:
-        self.checkpoint_dir = os.path.join(checkpoint_dir, "nvidia/Cosmos-Guardrail1/blocklist")
-        nltk.data.path.append(os.path.join(self.checkpoint_dir, "nltk_data"))
-        self.lemmatizer = nltk.WordNetLemmatizer()
-        self.profanity = profanity
-        self.guardrail_partial_match_min_chars = guardrail_partial_match_min_chars
-        self.guardrail_partial_match_letter_count = guardrail_partial_match_letter_count
-        # Load blocklist and whitelist keywords
-        self.blocklist_words = read_keyword_list_from_dir(os.path.join(self.checkpoint_dir, "custom"))
-        self.whitelist_words = read_keyword_list_from_dir(os.path.join(self.checkpoint_dir, "whitelist"))
-        self.exact_match_words = read_keyword_list_from_dir(os.path.join(self.checkpoint_dir, "exact_match"))
-        self.profanity.load_censor_words(custom_words=self.blocklist_words, whitelist_words=self.whitelist_words)
-        log.debug(f"Loaded {len(self.blocklist_words)} words/phrases from blocklist")
-        log.debug(f"Whitelisted {len(self.whitelist_words)} words/phrases from whitelist")
-        log.debug(f"Loaded {len(self.exact_match_words)} exact match words/phrases from blocklist")
-    def uncensor_whitelist(self, input_prompt: str, censored_prompt: str) -> str:
-        """Explicitly uncensor words that are in the whitelist."""
-        input_words = input_prompt.split()
-        censored_words = censored_prompt.split()
-        whitelist_words = set(self.whitelist_words)
-        for i, token in enumerate(input_words):
-            if token.strip(string.punctuation).lower() in whitelist_words:
-                censored_words[i] = token
-        censored_prompt = " ".join(censored_words)
-        return censored_prompt
-    def censor_prompt(self, input_prompt: str) -> tuple[bool, str]:
-        """Censor the prompt using the blocklist with better-profanity fuzzy matching.
-        Args:
-            input_prompt: input prompt to censor
-        Returns:
-            bool: True if the prompt is blocked, False otherwise
-            str: A message indicating why the prompt was blocked
-        """
-        censored_prompt = self.profanity.censor(input_prompt, censor_char=CENSOR)
-        # Uncensor whitelisted words that were censored from blocklist fuzzy matching
-        censored_prompt = self.uncensor_whitelist(input_prompt, censored_prompt)
-        if CENSOR in censored_prompt:
-            return True, f"Prompt blocked by censorship: Censored Prompt: {censored_prompt}"
-        return False, ""
-    @staticmethod
-    def check_partial_match(
-        normalized_prompt: str, normalized_word: str, guardrail_partial_match_letter_count: float
-    ) -> tuple[bool, str]:
-        """
-        Check robustly if normalized word and the matching target have a difference of up to guardrail_partial_match_letter_count characters.
-        Args:
-            normalized_prompt: a string with many words
-            normalized_word: a string with one or multiple words, its length is smaller than normalized_prompt
-            guardrail_partial_match_letter_count: maximum allowed difference in characters (float to allow partial characters)
-        Returns:
-            bool: True if a match is found, False otherwise
-            str: A message indicating why the prompt was blocked
-        """
-        prompt_words = normalized_prompt.split()
-        word_length = len(normalized_word.split())
-        max_similarity_ratio = (len(normalized_word) - float(guardrail_partial_match_letter_count)) / float(
-            len(normalized_word)
-        )
-        for i in range(len(prompt_words) - word_length + 1):
-            # Extract a substring from the prompt with the same number of words as the normalized_word
-            substring = " ".join(prompt_words[i : i + word_length])
-            similarity_ratio = SequenceMatcher(None, substring, normalized_word).ratio()
-            if similarity_ratio >= max_similarity_ratio:
-                return (
-                    True,
-                    f"Prompt blocked by partial match blocklist: Prompt: {normalized_prompt}, Partial Match Word: {normalized_word}",
-                )
-        return False, ""
-    @staticmethod
-    def check_against_whole_word_blocklist(
-        prompt: str,
-        blocklist: list[str],
-        guardrail_partial_match_min_chars: int = 6,
-        guardrail_partial_match_letter_count: float = 0.4,
-    ) -> bool:
-        """
-        Check if the prompt contains any whole words from the blocklist.
-        The match is case insensitive and robust to multiple spaces between words.
-        Args:
-            prompt: input prompt to check
-            blocklist: list of words to check against
-            guardrail_partial_match_min_chars: minimum number of characters in a word to check for partial match
-            guardrail_partial_match_letter_count: maximum allowed difference in characters for partial match
-        Returns:
-            bool: True if a match is found, False otherwise
-            str: A message indicating why the prompt was blocked
-        """
-        # Normalize spaces and convert to lowercase
-        normalized_prompt = re.sub(r"\s+", " ", prompt).strip().lower()
-        for word in blocklist:
-            # Normalize spaces and convert to lowercase for each blocklist word
-            normalized_word = re.sub(r"\s+", " ", word).strip().lower()
-            # Use word boundaries to ensure whole word match
-            if re.search(r"\b" + re.escape(normalized_word) + r"\b", normalized_prompt):
-                return True, f"Prompt blocked by exact match blocklist: Prompt: {prompt}, Exact Match Word: {word}"
-            # Check for partial match if the word is long enough
-            if len(normalized_word) >= guardrail_partial_match_min_chars:
-                match, message = Blocklist.check_partial_match(
-                    normalized_prompt, normalized_word, guardrail_partial_match_letter_count
-                )
-                if match:
-                    return True, message
-        return False, ""
-    def is_safe(self, input_prompt: str = "") -> tuple[bool, str]:
-        """Check if the input prompt is safe using the blocklist."""
-        # Check if the input is empty
-        if not input_prompt:
-            return False, "Input is empty"
-        input_prompt = to_ascii(input_prompt)
-        # Check full sentence for censored words
-        censored, message = self.censor_prompt(input_prompt)
-        if censored:
-            return False, message
-        # Check lemmatized words for censored words
-        tokens = nltk.word_tokenize(input_prompt)
-        lemmas = [self.lemmatizer.lemmatize(token) for token in tokens]
-        lemmatized_prompt = " ".join(lemmas)
-        censored, message = self.censor_prompt(lemmatized_prompt)
-        if censored:
-            return False, message
-        # Check for exact match blocklist words
-        censored, message = self.check_against_whole_word_blocklist(
-            input_prompt,
-            self.exact_match_words,
-            self.guardrail_partial_match_min_chars,
-            self.guardrail_partial_match_letter_count,
-        )
-        if censored:
-            return False, message
-        # If all these checks pass, the input is safe
-        return True, "Input is safe"
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--prompt", type=str, required=True, help="Input prompt")
-    parser.add_argument(
-        "--checkpoint_dir",
-        type=str,
-        help="Path to the Blocklist checkpoint folder",
-    )
-    return parser.parse_args()
-def main(args):
-    blocklist = Blocklist(checkpoint_dir=args.checkpoint_dir)
-    runner = GuardrailRunner(safety_models=[blocklist])
-    with misc.timer("blocklist safety check"):
-        safety, message = runner.run_safety_check(args.prompt)
-    log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
-    log.info(f"Message: {message}") if not safety else None
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/blocklist/utils.py DELETED Viewed

@@ -1,45 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import re
-from cosmos_transfer1.utils import log
-def read_keyword_list_from_dir(folder_path: str) -> list[str]:
-    """Read keyword list from all files in a folder."""
-    output_list = []
-    file_list = []
-    # Get list of files in the folder
-    for file in os.listdir(folder_path):
-        if os.path.isfile(os.path.join(folder_path, file)):
-            file_list.append(file)
-    # Process each file
-    for file in file_list:
-        file_path = os.path.join(folder_path, file)
-        try:
-            with open(file_path, "r") as f:
-                output_list.extend([line.strip() for line in f.readlines()])
-        except Exception as e:
-            log.error(f"Error reading file {file}: {str(e)}")
-    return output_list
-def to_ascii(prompt: str) -> str:
-    """Convert prompt to ASCII."""
-    return re.sub(r"[^\x00-\x7F]+", " ", prompt)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/__init__.py DELETED Viewed

File without changes

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/core.py DELETED Viewed

@@ -1,71 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import Any, Tuple
-import numpy as np
-from cosmos_transfer1.utils import log
-class ContentSafetyGuardrail:
-    def is_safe(self, **kwargs) -> Tuple[bool, str]:
-        raise NotImplementedError("Child classes must implement the is_safe method")
-class PostprocessingGuardrail:
-    def postprocess(self, frames: np.ndarray) -> np.ndarray:
-        raise NotImplementedError("Child classes must implement the postprocess method")
-class GuardrailRunner:
-    def __init__(
-        self,
-        safety_models: list[ContentSafetyGuardrail] | None = None,
-        generic_block_msg: str = "",
-        generic_safe_msg: str = "",
-        postprocessors: list[PostprocessingGuardrail] | None = None,
-    ):
-        self.safety_models = safety_models
-        self.generic_block_msg = generic_block_msg
-        self.generic_safe_msg = generic_safe_msg if generic_safe_msg else "Prompt is safe"
-        self.postprocessors = postprocessors
-    def run_safety_check(self, input: Any) -> Tuple[bool, str]:
-        """Run the safety check on the input."""
-        if not self.safety_models:
-            log.warning("No safety models found, returning safe")
-            return True, self.generic_safe_msg
-        for guardrail in self.safety_models:
-            guardrail_name = str(guardrail.__class__.__name__).upper()
-            log.debug(f"Running guardrail: {guardrail_name}")
-            safe, message = guardrail.is_safe(input)
-            if not safe:
-                reasoning = self.generic_block_msg if self.generic_block_msg else f"{guardrail_name}: {message}"
-                return False, reasoning
-        return True, self.generic_safe_msg
-    def postprocess(self, frames: np.ndarray) -> np.ndarray:
-        """Run the postprocessing on the video frames."""
-        if not self.postprocessors:
-            log.warning("No postprocessors found, returning original frames")
-            return frames
-        for guardrail in self.postprocessors:
-            guardrail_name = str(guardrail.__class__.__name__).upper()
-            log.debug(f"Running guardrail: {guardrail_name}")
-            frames = guardrail.postprocess(frames)
-        return frames

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/io_utils.py DELETED Viewed

@@ -1,78 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import glob
-from dataclasses import dataclass
-import imageio
-import numpy as np
-from cosmos_transfer1.utils import log
-@dataclass
-class VideoData:
-    frames: np.ndarray  # Shape: [B, H, W, C]
-    fps: int
-    duration: int  # in seconds
-def get_video_filepaths(input_dir: str) -> list[str]:
-    """Get a list of filepaths for all videos in the input directory."""
-    paths = glob.glob(f"{input_dir}/**/*.mp4", recursive=True)
-    paths += glob.glob(f"{input_dir}/**/*.avi", recursive=True)
-    paths += glob.glob(f"{input_dir}/**/*.mov", recursive=True)
-    paths = sorted(paths)
-    log.debug(f"Found {len(paths)} videos")
-    return paths
-def read_video(filepath: str) -> VideoData:
-    """Read a video file and extract its frames and metadata."""
-    try:
-        reader = imageio.get_reader(filepath, "ffmpeg")
-    except Exception as e:
-        raise ValueError(f"Failed to read video file: {filepath}") from e
-    # Extract metadata from the video file
-    try:
-        metadata = reader.get_meta_data()
-        fps = metadata.get("fps")
-        duration = metadata.get("duration")
-    except Exception as e:
-        reader.close()
-        raise ValueError(f"Failed to extract metadata from video file: {filepath}") from e
-    # Extract frames from the video file
-    try:
-        frames = np.array([frame for frame in reader])
-    except Exception as e:
-        raise ValueError(f"Failed to extract frames from video file: {filepath}") from e
-    finally:
-        reader.close()
-    return VideoData(frames=frames, fps=fps, duration=duration)
-def save_video(filepath: str, frames: np.ndarray, fps: int) -> None:
-    """Save a video file from a sequence of frames."""
-    try:
-        writer = imageio.get_writer(filepath, fps=fps, macro_block_size=1)
-        for frame in frames:
-            writer.append_data(frame)
-    except Exception as e:
-        raise ValueError(f"Failed to save video file to {filepath}") from e
-    finally:
-        writer.close()

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/common/presets.py DELETED Viewed

@@ -1,75 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import numpy as np
-from cosmos_transfer1.auxiliary.guardrail.blocklist.blocklist import Blocklist
-from cosmos_transfer1.auxiliary.guardrail.common.core import GuardrailRunner
-from cosmos_transfer1.auxiliary.guardrail.face_blur_filter.face_blur_filter import RetinaFaceFilter
-from cosmos_transfer1.auxiliary.guardrail.llamaGuard3.llamaGuard3 import LlamaGuard3
-from cosmos_transfer1.auxiliary.guardrail.video_content_safety_filter.video_content_safety_filter import (
-    VideoContentSafetyFilter,
-)
-from cosmos_transfer1.utils import log
-def create_text_guardrail_runner(checkpoint_dir: str) -> GuardrailRunner:
-    """Create the text guardrail runner."""
-    return GuardrailRunner(safety_models=[Blocklist(checkpoint_dir), LlamaGuard3(checkpoint_dir)])
-def create_video_guardrail_runner(checkpoint_dir: str) -> GuardrailRunner:
-    """Create the video guardrail runner."""
-    return GuardrailRunner(
-        safety_models=[VideoContentSafetyFilter(checkpoint_dir)],
-        postprocessors=[RetinaFaceFilter(checkpoint_dir)],
-    )
-def run_text_guardrail(prompt: str, guardrail_runner: GuardrailRunner) -> bool:
-    """Run the text guardrail on the prompt, checking for content safety.
-    Args:
-        prompt: The text prompt.
-        guardrail_runner: The text guardrail runner.
-    Returns:
-        bool: Whether the prompt is safe.
-    """
-    is_safe, message = guardrail_runner.run_safety_check(prompt)
-    if not is_safe:
-        log.critical(f"GUARDRAIL BLOCKED: {message}")
-    return is_safe
-def run_video_guardrail(frames: np.ndarray, guardrail_runner: GuardrailRunner) -> np.ndarray | None:
-    """Run the video guardrail on the frames, checking for content safety and applying face blur.
-    Args:
-        frames: The frames of the generated video.
-        guardrail_runner: The video guardrail runner.
-    Returns:
-        The processed frames if safe, otherwise None.
-    """
-    is_safe, message = guardrail_runner.run_safety_check(frames)
-    if not is_safe:
-        log.critical(f"GUARDRAIL BLOCKED: {message}")
-        return None
-    frames = guardrail_runner.postprocess(frames)
-    return frames

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/blur_utils.py DELETED Viewed

@@ -1,35 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cv2
-import numpy as np
-def pixelate_face(face_img: np.ndarray, blocks: int = 5) -> np.ndarray:
-    """
-    Pixelate a face region by reducing resolution and then upscaling.
-    Args:
-        face_img: Face region to pixelate
-        blocks: Number of blocks to divide the face into (in each dimension)
-    Returns:
-        Pixelated face region
-    """
-    h, w = face_img.shape[:2]
-    # Shrink the image and scale back up to create pixelation effect
-    temp = cv2.resize(face_img, (blocks, blocks), interpolation=cv2.INTER_LINEAR)
-    pixelated = cv2.resize(temp, (w, h), interpolation=cv2.INTER_NEAREST)
-    return pixelated

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/face_blur_filter.py DELETED Viewed

@@ -1,225 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import os
-import numpy as np
-import torch
-from retinaface.data import cfg_re50
-from retinaface.layers.functions.prior_box import PriorBox
-from retinaface.models.retinaface import RetinaFace
-from torch.utils.data import DataLoader, TensorDataset
-from tqdm import tqdm
-from cosmos_transfer1.auxiliary.guardrail.common.core import GuardrailRunner, PostprocessingGuardrail
-from cosmos_transfer1.auxiliary.guardrail.common.io_utils import get_video_filepaths, read_video, save_video
-from cosmos_transfer1.auxiliary.guardrail.face_blur_filter.blur_utils import pixelate_face
-from cosmos_transfer1.auxiliary.guardrail.face_blur_filter.retinaface_utils import (
-    decode_batch,
-    filter_detected_boxes,
-    load_model,
-)
-from cosmos_transfer1.utils import log, misc
-# RetinaFace model constants from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
-TOP_K = 5_000
-KEEP_TOP_K = 750
-NMS_THRESHOLD = 0.4
-class RetinaFaceFilter(PostprocessingGuardrail):
-    def __init__(
-        self,
-        checkpoint_dir: str,
-        batch_size: int = 1,
-        confidence_threshold: float = 0.7,
-        device="cuda" if torch.cuda.is_available() else "cpu",
-    ) -> None:
-        """
-        Initialize the RetinaFace model for face detection and blurring.
-        Args:
-            checkpoint: Path to the RetinaFace checkpoint file
-            batch_size: Batch size for RetinaFace inference and processing
-            confidence_threshold: Minimum confidence score to consider a face detection
-        """
-        self.checkpoint = f"{checkpoint_dir}/nvidia/Cosmos-Guardrail1/face_blur_filter/Resnet50_Final.pth"
-        self.cfg = cfg_re50
-        self.batch_size = batch_size
-        self.confidence_threshold = confidence_threshold
-        self.device = device
-        self.dtype = torch.float32
-        # Disable loading ResNet pretrained weights
-        self.cfg["pretrain"] = False
-        self.net = RetinaFace(cfg=self.cfg, phase="test")
-        cpu = self.device == "cpu"
-        # Load from RetinaFace pretrained checkpoint
-        self.net = load_model(self.net, self.checkpoint, cpu)
-        self.net.to(self.device, dtype=self.dtype).eval()
-    def preprocess_frames(self, frames: np.ndarray) -> torch.Tensor:
-        """Preprocess a sequence of frames for face detection.
-        Args:
-            frames: Input frames
-        Returns:
-            Preprocessed frames tensor
-        """
-        with torch.no_grad():
-            frames_tensor = torch.from_numpy(frames).to(self.device, dtype=self.dtype)  # Shape: [T, H, W, C]
-            frames_tensor = frames_tensor.permute(0, 3, 1, 2)  # Shape: [T, C, H, W]
-            frames_tensor = frames_tensor[:, [2, 1, 0], :, :]  # RGB to BGR to match RetinaFace model input
-            means = torch.tensor([104.0, 117.0, 123.0], device=self.device, dtype=self.dtype).view(1, 3, 1, 1)
-            frames_tensor = frames_tensor - means  # Subtract mean BGR values for each channel
-            return frames_tensor
-    def blur_detected_faces(
-        self,
-        frames: np.ndarray,
-        batch_loc: torch.Tensor,
-        batch_conf: torch.Tensor,
-        prior_data: torch.Tensor,
-        scale: torch.Tensor,
-        min_size: tuple[int] = (20, 20),
-    ) -> list[np.ndarray]:
-        """Blur detected faces in a batch of frames using RetinaFace predictions.
-        Args:
-            frames: Input frames
-            batch_loc: Batched location predictions
-            batch_conf: Batched confidence scores
-            prior_data: Prior boxes for the video
-            scale: Scale factor for resizing detections
-            min_size: Minimum size of a detected face region in pixels
-        Returns:
-            Processed frames with pixelated faces
-        """
-        with torch.no_grad():
-            batch_boxes = decode_batch(batch_loc, prior_data, self.cfg["variance"])
-            batch_boxes = batch_boxes * scale
-        blurred_frames = []
-        for i, boxes in enumerate(batch_boxes):
-            boxes = boxes.detach().cpu().numpy()
-            scores = batch_conf[i, :, 1].detach().cpu().numpy()
-            filtered_boxes = filter_detected_boxes(
-                boxes,
-                scores,
-                confidence_threshold=self.confidence_threshold,
-                nms_threshold=NMS_THRESHOLD,
-                top_k=TOP_K,
-                keep_top_k=KEEP_TOP_K,
-            )
-            frame = frames[i]
-            for box in filtered_boxes:
-                x1, y1, x2, y2 = map(int, box)
-                # Ignore bounding boxes smaller than the minimum size
-                if x2 - x1 < min_size[0] or y2 - y1 < min_size[1]:
-                    continue
-                max_h, max_w = frame.shape[:2]
-                face_roi = frame[max(y1, 0) : min(y2, max_h), max(x1, 0) : min(x2, max_w)]
-                blurred_face = pixelate_face(face_roi)
-                frame[max(y1, 0) : min(y2, max_h), max(x1, 0) : min(x2, max_w)] = blurred_face
-            blurred_frames.append(frame)
-        return blurred_frames
-    def postprocess(self, frames: np.ndarray) -> np.ndarray:
-        """Blur faces in a sequence of frames.
-        Args:
-            frames: Input frames
-        Returns:
-            Processed frames with pixelated faces
-        """
-        # Create dataset and dataloader
-        frames_tensor = self.preprocess_frames(frames)
-        dataset = TensorDataset(frames_tensor)
-        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False)
-        processed_frames, processed_batches = [], []
-        prior_data, scale = None, None
-        for i, batch in enumerate(dataloader):
-            batch = batch[0]
-            h, w = batch.shape[-2:]  # Batch shape: [C, H, W]
-            with torch.no_grad():
-                # Generate priors for the video
-                if prior_data is None:
-                    priorbox = PriorBox(self.cfg, image_size=(h, w))
-                    priors = priorbox.forward()
-                    priors = priors.to(self.device, dtype=self.dtype)
-                    prior_data = priors.data
-                # Get scale for resizing detections
-                if scale is None:
-                    scale = torch.Tensor([w, h, w, h])
-                    scale = scale.to(self.device, dtype=self.dtype)
-                batch_loc, batch_conf, _ = self.net(batch)
-            # Blur detected faces in each batch of frames
-            start_idx = i * self.batch_size
-            end_idx = min(start_idx + self.batch_size, len(frames))
-            processed_batches.append(
-                self.blur_detected_faces(frames[start_idx:end_idx], batch_loc, batch_conf, prior_data, scale)
-            )
-        processed_frames = [frame for batch in processed_batches for frame in batch]
-        return np.array(processed_frames)
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_dir", type=str, required=True, help="Path containing input videos")
-    parser.add_argument("--output_dir", type=str, required=True, help="Path for saving processed videos")
-    parser.add_argument(
-        "--checkpoint",
-        type=str,
-        help="Path to the RetinaFace checkpoint file",
-    )
-    return parser.parse_args()
-def main(args):
-    filepaths = get_video_filepaths(args.input_dir)
-    if not filepaths:
-        log.error(f"No video files found in directory: {args.input_dir}")
-        return
-    face_blur = RetinaFaceFilter(checkpoint=args.checkpoint)
-    postprocessing_runner = GuardrailRunner(postprocessors=[face_blur])
-    os.makedirs(args.output_dir, exist_ok=True)
-    for filepath in tqdm(filepaths):
-        video_data = read_video(filepath)
-        with misc.timer("face blur filter"):
-            frames = postprocessing_runner.postprocess(video_data.frames)
-        output_path = os.path.join(args.output_dir, os.path.basename(filepath))
-        save_video(output_path, frames, video_data.fps)
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/face_blur_filter/retinaface_utils.py DELETED Viewed

@@ -1,117 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numpy as np
-import torch
-from retinaface.utils.nms.py_cpu_nms import py_cpu_nms
-from cosmos_transfer1.utils import log
-# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
-def filter_detected_boxes(boxes, scores, confidence_threshold, nms_threshold, top_k, keep_top_k):
-    """Filter boxes based on confidence score and remove overlapping boxes using NMS."""
-    # Keep detections with confidence above threshold
-    inds = np.where(scores > confidence_threshold)[0]
-    boxes = boxes[inds]
-    scores = scores[inds]
-    # Sort by confidence and keep top K detections
-    order = scores.argsort()[::-1][:top_k]
-    boxes = boxes[order]
-    scores = scores[order]
-    # Run non-maximum-suppression (NMS) to remove overlapping boxes
-    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
-    keep = py_cpu_nms(dets, nms_threshold)
-    dets = dets[keep, :]
-    dets = dets[:keep_top_k, :]
-    boxes = dets[:, :-1]
-    return boxes
-# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/utils/box_utils.py to handle batched inputs
-def decode_batch(loc, priors, variances):
-    """Decode batched locations from predictions using priors and variances.
-    Args:
-        loc (tensor): Batched location predictions for loc layers.
-            Shape: [batch_size, num_priors, 4]
-        priors (tensor): Prior boxes in center-offset form.
-            Shape: [num_priors, 4]
-        variances: (list[float]): Variances of prior boxes.
-    Return:
-        Decoded batched bounding box predictions
-            Shape: [batch_size, num_priors, 4]
-    """
-    batch_size = loc.size(0)
-    priors = priors.unsqueeze(0).expand(batch_size, -1, -1)
-    boxes = torch.cat(
-        (
-            priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:],
-            priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1]),
-        ),
-        dim=2,
-    )
-    boxes[:, :, :2] -= boxes[:, :, 2:] / 2
-    boxes[:, :, 2:] += boxes[:, :, :2]
-    return boxes
-# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
-def _check_keys(model, pretrained_state_dict):
-    ckpt_keys = set(pretrained_state_dict.keys())
-    model_keys = set(model.state_dict().keys())
-    used_pretrained_keys = model_keys & ckpt_keys
-    unused_pretrained_keys = ckpt_keys - model_keys
-    missing_keys = model_keys - ckpt_keys
-    log.debug("Missing keys:{}".format(len(missing_keys)))
-    log.debug("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
-    log.debug("Used keys:{}".format(len(used_pretrained_keys)))
-    assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
-    return True
-# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
-def _remove_prefix(state_dict, prefix):
-    """Old version of the model is stored with all names of parameters sharing common prefix 'module.'"""
-    log.debug("Removing prefix '{}'".format(prefix))
-    def f(x):
-        return x.split(prefix, 1)[-1] if x.startswith(prefix) else x
-    return {f(key): value for key, value in state_dict.items()}
-# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
-def load_model(model, pretrained_path, load_to_cpu):
-    log.debug("Loading pretrained model from {}".format(pretrained_path))
-    if load_to_cpu:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage, weights_only=True)
-    else:
-        device = torch.cuda.current_device()
-        pretrained_dict = torch.load(
-            pretrained_path, map_location=lambda storage, loc: storage.cuda(device), weights_only=True
-        )
-    if "state_dict" in pretrained_dict.keys():
-        pretrained_dict = _remove_prefix(pretrained_dict["state_dict"], "module.")
-    else:
-        pretrained_dict = _remove_prefix(pretrained_dict, "module.")
-    _check_keys(model, pretrained_dict)
-    model.load_state_dict(pretrained_dict, strict=False)
-    return model

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/llamaGuard3/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/llamaGuard3/categories.py DELETED Viewed

@@ -1,31 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-UNSAFE_CATEGORIES = {
-    "S1": "Violent Crimes.",
-    "S2": "Non-Violent Crimes.",
-    "S3": "Sex Crimes.",
-    "S4": "Child Exploitation.",
-    "S5": "Defamation.",
-    "S6": "Specialized Advice.",
-    "S7": "Privacy.",
-    "S8": "Intellectual Property.",
-    "S9": "Indiscriminate Weapons.",
-    "S10": "Hate.",
-    "S11": "Self-Harm.",
-    "S12": "Sexual Content.",
-    "S13": "Elections.",
-    "s14": "Code Interpreter Abuse.",
-}

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/llamaGuard3/llamaGuard3.py DELETED Viewed

@@ -1,122 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import os
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from cosmos_transfer1.auxiliary.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner
-from cosmos_transfer1.auxiliary.guardrail.llamaGuard3.categories import UNSAFE_CATEGORIES
-from cosmos_transfer1.utils import log, misc
-SAFE = misc.Color.green("SAFE")
-UNSAFE = misc.Color.red("UNSAFE")
-class LlamaGuard3(ContentSafetyGuardrail):
-    def __init__(
-        self,
-        checkpoint_dir: str,
-        device="cuda" if torch.cuda.is_available() else "cpu",
-    ) -> None:
-        self.checkpoint_dir = checkpoint_dir
-        self.device = device
-        self.dtype = torch.bfloat16
-        model_id = "meta-llama/Llama-Guard-3-8B"
-        model_dir = os.path.join(self.checkpoint_dir, model_id)
-        self.model = AutoModelForCausalLM.from_pretrained(model_dir)
-        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
-        self.model.to(self.device, dtype=self.dtype).eval()
-    def get_llamaGuard3_block_message(self, moderation_output: str) -> str:
-        """Extract the blocked category from the Llama Guard 3 model output."""
-        block_msg = "Prompt blocked by Llama Guard 3."
-        try:
-            lines = moderation_output.splitlines()
-            categories_detected = []
-            for line in lines[1:]:
-                line_stripped = line.split("<|eot_id|>")[0].strip()
-                for catagory in line_stripped.split(","):
-                    catagory = catagory.strip()
-                    if catagory not in UNSAFE_CATEGORIES:
-                        log.warning(f"Unrecognized category from moderation output: {catagory}")
-                    else:
-                        categories_detected.append(catagory)
-            if len(categories_detected) > 0:
-                blocked_catagories = ", ".join([UNSAFE_CATEGORIES[catagory][:-1] for catagory in categories_detected])
-                block_msg = f"{block_msg} Violations: {blocked_catagories}."
-        except Exception as e:
-            log.warning(f"Unable to extract blocked category from Llama Guard 3 output: {e}")
-        return block_msg
-    def filter_llamaGuard3_output(self, prompt: str) -> tuple[bool, str]:
-        """Filter the Llama Guard 3 model output and return the safety status and message."""
-        conversation = [{"role": "user", "content": prompt}]
-        input_ids = self.tokenizer.apply_chat_template(
-            conversation, categories=UNSAFE_CATEGORIES, return_tensors="pt"
-        ).to("cuda")
-        prompt_len = input_ids.shape[1]
-        output = self.model.generate(
-            input_ids=input_ids,
-            max_new_tokens=100,
-            return_dict_in_generate=True,
-            pad_token_id=0,
-        )
-        generated_tokens = output.sequences[:, prompt_len:]
-        moderation_output = self.tokenizer.decode(generated_tokens[0], skip_special_tokens=False).strip()
-        if "unsafe" in moderation_output.lower():
-            block_msg = self.get_llamaGuard3_block_message(moderation_output)
-            return False, block_msg
-        else:
-            return True, ""
-    def is_safe(self, prompt: str) -> tuple[bool, str]:
-        """Check if the input prompt is safe according to the Llama Guard 3 model."""
-        try:
-            return self.filter_llamaGuard3_output(prompt)
-        except Exception as e:
-            log.error(f"Unexpected error occurred when running Llama Guard 3 guardrail: {e}")
-            return True, "Unexpected error occurred when running Llama Guard 3 guardrail."
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--prompt", type=str, required=True, help="Input prompt")
-    parser.add_argument(
-        "--checkpoint_dir",
-        type=str,
-        help="Path to the Llama Guard 3 checkpoint folder",
-    )
-    return parser.parse_args()
-def main(args):
-    llamaGuard3 = LlamaGuard3(checkpoint_dir=args.checkpoint_dir)
-    runner = GuardrailRunner(safety_models=[llamaGuard3])
-    with misc.timer("Llama Guard 3 safety check"):
-        safety, message = runner.run_safety_check(args.prompt)
-    log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
-    log.info(f"Message: {message}") if not safety else None
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/model.py DELETED Viewed

@@ -1,60 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import attrs
-import torch
-import torch.nn as nn
-from cosmos_transfer1.utils.ddp_config import make_freezable
-@make_freezable
-@attrs.define(slots=False)
-class ModelConfig:
-    input_size: int = 1152
-    num_classes: int = 7
-class SafetyClassifier(nn.Module):
-    def __init__(self, input_size: int = 1024, num_classes: int = 2):
-        super().__init__()
-        self.input_size = input_size
-        self.num_classes = num_classes
-        self.layers = nn.Sequential(
-            nn.Linear(self.input_size, 512),
-            nn.BatchNorm1d(512),
-            nn.ReLU(),
-            nn.Linear(512, 256),
-            nn.BatchNorm1d(256),
-            nn.ReLU(),
-            nn.Linear(256, self.num_classes),
-            # Note: No activation function here; CrossEntropyLoss expects raw logits
-        )
-    def forward(self, x):
-        return self.layers(x)
-class VideoSafetyModel(nn.Module):
-    def __init__(self, config: ModelConfig) -> None:
-        super().__init__()
-        self.config = config
-        self.num_classes = config.num_classes
-        self.network = SafetyClassifier(input_size=config.input_size, num_classes=self.num_classes)
-    @torch.inference_mode()
-    def forward(self, data_batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
-        logits = self.network(data_batch["data"].cuda())
-        return {"logits": logits}

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/video_content_safety_filter.py DELETED Viewed

@@ -1,185 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import json
-import os
-from typing import Iterable, Tuple, Union
-import torch
-from PIL import Image
-from cosmos_transfer1.auxiliary.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner
-from cosmos_transfer1.auxiliary.guardrail.common.io_utils import get_video_filepaths, read_video
-from cosmos_transfer1.auxiliary.guardrail.video_content_safety_filter.model import ModelConfig, VideoSafetyModel
-from cosmos_transfer1.auxiliary.guardrail.video_content_safety_filter.vision_encoder import SigLIPEncoder
-from cosmos_transfer1.utils import log, misc
-# Define the class index to class name mapping for multi-class classification
-CLASS_IDX_TO_NAME = {
-    0: "Safe",
-    1: "Sexual_Content",
-    3: "Drugs",
-    4: "Child_Abuse",
-    5: "Hate_and_Harassment",
-    6: "Self-Harm",
-}
-class VideoContentSafetyFilter(ContentSafetyGuardrail):
-    def __init__(
-        self,
-        checkpoint_dir: str,
-        device="cuda" if torch.cuda.is_available() else "cpu",
-    ) -> None:
-        self.checkpoint_dir = os.path.join(checkpoint_dir, "nvidia/Cosmos-Guardrail1/video_content_safety_filter")
-        self.device = device
-        self.dtype = torch.float32
-        # Initialize the SigLIP encoder
-        self.encoder = SigLIPEncoder(checkpoint_dir=self.checkpoint_dir, device=device, dtype=self.dtype)
-        # Use ModelConfig directly for inference configuration
-        model_config = ModelConfig(input_size=1152, num_classes=7)
-        # Load the multi-class classifier
-        self.model = VideoSafetyModel(model_config)
-        safety_filter_local_path = os.path.join(self.checkpoint_dir, "safety_filter.pt")
-        checkpoint = torch.load(safety_filter_local_path, map_location=torch.device("cpu"), weights_only=True)
-        self.model.load_state_dict(checkpoint["model"])
-        self.model.to(self.device, dtype=self.dtype).eval()
-    @torch.inference_mode()
-    def __infer(self, pil_image: Image.Image) -> int:
-        """Infer the class of the image."""
-        image_embs = self.encoder.encode_image(pil_image)
-        logits = self.model.network(image_embs)
-        probabilities = torch.nn.functional.softmax(logits, dim=-1)
-        predicted_class = torch.argmax(probabilities, dim=-1).item()
-        return predicted_class
-    def is_safe_file(self, filepath: str) -> bool:
-        """Check if the video file is safe."""
-        video_data = read_video(filepath)
-        # Sample frames at 2 FPS
-        sample_rate = 2  # frames per second
-        frame_interval = int(video_data.fps / sample_rate)
-        frame_numbers = list(range(0, int(video_data.fps * video_data.duration), frame_interval))
-        is_safe = True
-        frame_scores = []
-        for frame_number in frame_numbers:
-            try:
-                frame = video_data.frames[frame_number]
-                pil_image = Image.fromarray(frame)
-                predicted_class = self.__infer(pil_image)
-                class_name = CLASS_IDX_TO_NAME.get(predicted_class, "Safe")
-                frame_scores.append({"frame_number": frame_number, "class": class_name})
-                # If any frame is not "Safe", mark the video as unsafe
-                if class_name != "Safe":
-                    is_safe = False
-                    break
-            except Exception as e:
-                log.warning(f"Warning: Failed to run safety classifier on frame_number {frame_number}. Exception: {e}")
-                continue
-        # Prepare data for JSON
-        video_data = {
-            "filepath": filepath,
-            "is_safe": is_safe,
-            "video_length": video_data.duration,
-            "fps": video_data.fps,
-            "frame_scores": frame_scores,
-        }
-        log.info(f"Video {filepath} is {'SAFE' if is_safe else 'UNSAFE'}.")
-        log.debug(f"Video data: {json.dumps(video_data, indent=4)}")
-        return is_safe
-    def is_safe_frames(self, frames: Iterable) -> bool:
-        """Check if the generated video frames are safe."""
-        frame_scores = []
-        total_frames = 0
-        safe_frames = 0
-        for frame_number, frame in enumerate(frames):
-            try:
-                total_frames += 1
-                pil_image = Image.fromarray(frame)
-                predicted_class = self.__infer(pil_image)
-                class_name = CLASS_IDX_TO_NAME.get(predicted_class, "Safe")
-                frame_scores.append({"frame_number": frame_number, "class": class_name})
-                if class_name == "Safe":
-                    safe_frames += 1
-            except Exception as e:
-                log.warning(f"Warning: Failed to run safety classifier on frame_number {frame_number}. Exception: {e}")
-                continue
-        # Decide if the video is safe based on the ratio of safe frames
-        is_safe = False
-        if total_frames > 0:
-            is_safe = (safe_frames / total_frames) >= 0.95
-        video_data = {
-            "is_safe": is_safe,
-            "frame_scores": frame_scores,
-        }
-        log.debug(f"Frames data: {json.dumps(video_data, indent=4)}")
-        return is_safe
-    def is_safe(self, input: Union[str, Iterable]) -> Tuple[bool, str]:
-        if isinstance(input, str):
-            is_safe = self.is_safe_file(input)
-            return is_safe, "safe video detected" if is_safe else "unsafe video detected"
-        else:
-            is_safe = self.is_safe_frames(input)
-            return is_safe, "safe frames detected" if is_safe else "unsafe frames detected"
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_dir", type=str, required=True, help="Path containing input videos")
-    parser.add_argument(
-        "--checkpoint_dir",
-        type=str,
-        help="Path to the Video Content Safety Filter checkpoint folder",
-    )
-    return parser.parse_args()
-def main(args):
-    filepaths = get_video_filepaths(args.input_dir)
-    if not filepaths:
-        log.error(f"No video files found in directory: {args.input_dir}")
-        return
-    video_filter = VideoContentSafetyFilter(checkpoint_dir=args.checkpoint_dir)
-    runner = GuardrailRunner(safety_models=[video_filter], generic_safe_msg="Video is safe")
-    for filepath in filepaths:
-        with misc.timer("video content safety filter"):
-            _ = runner.run_safety_check(filepath)
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)

cosmos-transfer1/cosmos_transfer1/auxiliary/guardrail/video_content_safety_filter/vision_encoder.py DELETED Viewed

@@ -1,46 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import torch
-from PIL import Image
-from transformers import SiglipModel, SiglipProcessor
-class SigLIPEncoder(torch.nn.Module):
-    def __init__(
-        self,
-        checkpoint_dir: str,
-        model_name: str = "google/siglip-so400m-patch14-384",
-        device="cuda" if torch.cuda.is_available() else "cpu",
-        dtype=torch.float32,
-    ) -> None:
-        super().__init__()
-        self.checkpoint_dir = checkpoint_dir
-        self.device = device
-        self.dtype = dtype
-        self.model = SiglipModel.from_pretrained(model_name, cache_dir=self.checkpoint_dir)
-        self.processor = SiglipProcessor.from_pretrained(model_name, cache_dir=self.checkpoint_dir)
-        self.model.to(self.device, dtype=self.dtype).eval()
-    @torch.inference_mode()
-    def encode_image(self, input_img: Image.Image) -> torch.Tensor:
-        """Encode an image into a feature vector."""
-        with torch.no_grad():
-            inputs = self.processor(images=input_img, return_tensors="pt").to(self.device, dtype=self.dtype)
-            image_features = self.model.get_image_features(**inputs)
-            image_features /= image_features.norm(dim=-1, keepdim=True)
-        return image_features

cosmos-transfer1/cosmos_transfer1/auxiliary/human_keypoint/human_keypoint.py DELETED Viewed

@@ -1,155 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import cv2
-import numpy as np
-from rtmlib import Wholebody
-from cosmos_transfer1.diffusion.datasets.augmentors.human_keypoint_utils import (
-    coco_wholebody_133_skeleton,
-    openpose134_skeleton,
-)
-from cosmos_transfer1.utils import log
-class HumanKeypointModel:
-    def __init__(self, to_openpose=True, conf_thres=0.6):
-        self.model = Wholebody(
-            to_openpose=to_openpose,
-            mode="performance",
-            backend="onnxruntime",
-            device="cuda",
-        )
-        self.to_openpose = to_openpose
-        self.conf_thres = conf_thres
-    def __call__(self, input_video: str, output_video: str = "keypoint.mp4") -> str:
-        """
-        Generate the human body keypoint plot for the keypointControlNet video2world model.
-        Input: mp4 video
-        Output: mp4 keypoint video, of the same spatial and temporal dimensions as the input video.
-        """
-        log.info(f"Processing video: {input_video} to generate keypoint video: {output_video}")
-        assert os.path.exists(input_video)
-        cap = cv2.VideoCapture(input_video)
-        fps = int(cap.get(cv2.CAP_PROP_FPS))
-        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        frame_size = (frame_width, frame_height)
-        # vid writer
-        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-        skeleton_writer = cv2.VideoWriter(output_video, fourcc, fps, frame_size)
-        log.info(f"frame width: {frame_width}, frame height: {frame_height}, fps: {fps}")
-        log.info("start pose estimation for frames..")
-        # Process each frame
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
-                break
-            # Create a black background frame
-            black_frame = np.zeros_like(frame)
-            # Run pose estimation
-            keypoints, scores = self.model(frame)
-            if keypoints is not None and len(keypoints) > 0:
-                skeleton_frame = self.plot_person_kpts(
-                    black_frame,
-                    keypoints,
-                    scores,
-                    kpt_thr=self.conf_thres,
-                    openpose_format=True,
-                    line_width=4,
-                )  # (h, w, 3)
-            else:
-                skeleton_frame = black_frame
-            skeleton_writer.write(skeleton_frame[:, :, ::-1])
-        cap.release()
-        skeleton_writer.release()
-    def draw_skeleton(
-        self,
-        img: np.ndarray,
-        keypoints: np.ndarray,
-        scores: np.ndarray,
-        kpt_thr: float = 0.6,
-        openpose_format: bool = True,
-        radius: int = 2,
-        line_width: int = 4,
-    ):
-        skeleton_topology = openpose134_skeleton if openpose_format else coco_wholebody_133_skeleton
-        assert len(keypoints.shape) == 2
-        keypoint_info, skeleton_info = (
-            skeleton_topology["keypoint_info"],
-            skeleton_topology["skeleton_info"],
-        )
-        vis_kpt = [s >= kpt_thr for s in scores]
-        link_dict = {}
-        for i, kpt_info in keypoint_info.items():
-            kpt_color = tuple(kpt_info["color"])
-            link_dict[kpt_info["name"]] = kpt_info["id"]
-            kpt = keypoints[i]
-            if vis_kpt[i]:
-                img = cv2.circle(img, (int(kpt[0]), int(kpt[1])), int(radius), kpt_color, -1)
-        for i, ske_info in skeleton_info.items():
-            link = ske_info["link"]
-            pt0, pt1 = link_dict[link[0]], link_dict[link[1]]
-            if vis_kpt[pt0] and vis_kpt[pt1]:
-                link_color = ske_info["color"]
-                kpt0 = keypoints[pt0]
-                kpt1 = keypoints[pt1]
-                img = cv2.line(
-                    img, (int(kpt0[0]), int(kpt0[1])), (int(kpt1[0]), int(kpt1[1])), link_color, thickness=line_width
-                )
-        return img
-    def plot_person_kpts(
-        self,
-        pose_vis_img: np.ndarray,
-        keypoints: np.ndarray,
-        scores: np.ndarray,
-        kpt_thr: float = 0.6,
-        openpose_format: bool = True,
-        line_width: int = 4,
-    ) -> np.ndarray:
-        """
-        plot a single person
-        in-place update the pose image
-        """
-        for kpts, ss in zip(keypoints, scores):
-            try:
-                pose_vis_img = self.draw_skeleton(
-                    pose_vis_img, kpts, ss, kpt_thr=kpt_thr, openpose_format=openpose_format, line_width=line_width
-                )
-            except ValueError as e:
-                log.error(f"Error in draw_skeleton func, {e}")
-        return pose_vis_img

cosmos-transfer1/cosmos_transfer1/auxiliary/robot_augmentation/README.md DELETED Viewed

@@ -1,112 +0,0 @@
-# Robot Data Augmentation with Cosmos-Transfer1
-This pipeline provides a two-step process to augment robotic videos using **Cosmos-Transfer1-7B**. It leverages **spatial-temporal control** to modify backgrounds while preserving the shape and/or appearance of the robot foreground.
-## Overview of Settings
-We propose two augmentation settings:
-### Setting 1 (fg_vis_edge_bg_seg): Preserve Shape and Appearance of the Robot (foreground)
-- **Foreground Controls**: `Edge`, `Vis`
-- **Background Controls**: `Segmentation`
-- **Weights**:
-  - `w_edge(FG) = 1`
-  - `w_vis(FG) = 1`
-  - `w_seg(BG) = 1`
-  - All other weights = 0
-### Setting 2 (fg_edge_bg_seg): Preserve Only Shape of the Robot (foreground)
-- **Foreground Controls**: `Edge`
-- **Background Controls**: `Segmentation`
-- **Weights**:
-  - `w_edge(FG) = 1`
-  - `w_seg(BG) = 1`
-  - All other weights = 0
-## Step-by-Step Instructions
-### Step 1: Generate Spatial-Temporal Weights
-This script extracts foreground (robot) and background information from semantic segmentation data. It processes per-frame segmentation masks and color-to-class mappings to generate spatial-temporal weight matrices for each control modality based on the selected setting.
-#### Input Requirements:
-- A `segmentation` folder containing per-frame segmentation masks in PNG format
-- A `segmentation_label` folder containing color-to-class mapping JSON files for each frame, for example:
-  ```json
-  {
-      "(29, 0, 0, 255)": {
-          "class": "gripper0_right_r_palm_vis"
-      },
-      "(31, 0, 0, 255)": {
-          "class": "gripper0_right_R_thumb_proximal_base_link_vis"
-      },
-      "(33, 0, 0, 255)": {
-          "class": "gripper0_right_R_thumb_proximal_link_vis"
-      }
-  }
-  ```
-- An input video file
-Here is an example input format:
-[Example input directory](https://github.com/google-deepmind/cosmos/tree/main/assets/robot_augmentation_example/example1)
-#### Usage
-```bash
-PYTHONPATH=$(pwd) python cosmos_transfer1/auxiliary/robot_augmentation/spatial_temporal_weight.py \
-    --setting setting1 \
-    --robot-keywords world_robot gripper robot \
-    --input-dir assets/robot_augmentation_example \
-    --output-dir outputs/robot_augmentation_example
-```
-#### Parameters:
-* `--setting`: Weight setting to use (choices: 'setting1', 'setting2', default: 'setting1')
-  * setting1: Emphasizes robot in visual and edge features (vis: 1.0 foreground, edge: 1.0 foreground, seg: 1.0 background)
-  * setting2: Emphasizes robot only in edge features (edge: 1.0 foreground, seg: 1.0 background)
-* `--input-dir`: Input directory containing example folders
-  * Default: 'assets/robot_augmentation_example'
-* `--output-dir`: Output directory for weight matrices
-  * Default: 'outputs/robot_augmentation_example'
-* `--robot-keywords`: Keywords used to identify robot classes
-  * Default: ["world_robot", "gripper", "robot"]
-  * Any semantic class containing these keywords will be treated as robot foreground
-### Step 2: Run Cosmos-Transfer1 Inference
-Use the generated spatial-temporal weight matrices to perform video augmentation with the proper controls.
-```bash
-export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:=0}"
-export CHECKPOINT_DIR="${CHECKPOINT_DIR:=./checkpoints}"
-export NUM_GPU="${NUM_GPU:=1}"
-PYTHONPATH=$(pwd) torchrun --nproc_per_node=$NUM_GPU --nnodes=1 --node_rank=0 \
-cosmos_transfer1/diffusion/inference/transfer.py \
-    --checkpoint_dir $CHECKPOINT_DIR \
-    --video_save_folder outputs/robot_example_spatial_temporal_setting1 \
-    --controlnet_specs assets/robot_augmentation_example/example1/inference_cosmos_transfer1_robot_spatiotemporal_weights.json \
-    --offload_text_encoder_model \
-    --offload_guardrail_models \
-    --num_gpus $NUM_GPU
-```
-- Augmented videos are saved in `outputs/robot_example_spatial_temporal_setting1/`
-## Input Outputs Example
-Input video:
-<video src="https://github.com/user-attachments/assets/9c2df99d-7d0c-4dcf-af87-4ec9f65328ed">
-  Your browser does not support the video tag.
-</video>
-You can run multiple times with different prompts (e.g., `assets/robot_augmentation_example/example1/example1_prompts.json`), and you can get different augmentation results:
-<video src="https://github.com/user-attachments/assets/6dee15f5-9d8b-469a-a92a-3419cb466d44">
-  Your browser does not support the video tag.
-</video>

cosmos-transfer1/cosmos_transfer1/auxiliary/robot_augmentation/spatial_temporal_weight.py DELETED Viewed

@@ -1,577 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This script processes segmentation results for each video frame saved as JSON files and generates a spatial-temporal weight matrix saved as a .pt file.
-# The input JSON files contain segmentation information for each frame, and the output .pt file represents the spatial-temporal weight matrix for the video.
-import argparse
-import glob
-import json
-import logging
-import os
-import re
-from collections import defaultdict
-import cv2
-import numpy as np
-import torch
-from tqdm import tqdm
-# Configure logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-logger = logging.getLogger(__name__)
-# Class to manage different weight settings
-class WeightSettings:
-    """Class to manage different weight settings for the features"""
-    @staticmethod
-    def get_settings(setting_name):
-        """Get weight settings by name
-        Args:
-            setting_name (str): Name of the setting
-        Returns:
-            dict: Dictionary with weights for each feature
-        """
-        settings = {
-            # Default setting: Emphasize robot in all features
-            "fg_vis_edge_bg_seg": {
-                "depth": {"foreground": 0.0, "background": 0.0},
-                "vis": {"foreground": 1.0, "background": 0.0},
-                "edge": {"foreground": 1.0, "background": 0.0},
-                "seg": {"foreground": 0.0, "background": 1.0},
-            },
-            "fg_edge_bg_seg": {
-                "depth": {"foreground": 0.0, "background": 0.0},
-                "vis": {"foreground": 0.0, "background": 0.0},
-                "edge": {"foreground": 1.0, "background": 0.0},
-                "seg": {"foreground": 0.0, "background": 1.0},
-            },
-        }
-        if setting_name not in settings:
-            logger.warning(f"Setting '{setting_name}' not found. Using default.")
-            return settings["fg_vis_edge_bg_seg"]
-        return settings[setting_name]
-    @staticmethod
-    def list_settings():
-        """List all available settings
-        Returns:
-            list: List of setting names
-        """
-        return ["fg_vis_edge_bg_seg", "fg_edge_bg_seg"]
-def get_video_info(video_path):
-    """Get video dimensions and frame count"""
-    cap = cv2.VideoCapture(video_path)
-    if not cap.isOpened():
-        raise ValueError(f"Could not open video file: {video_path}")
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    cap.release()
-    return width, height, frame_count, fps
-def parse_color_key(color_key):
-    """Parse a color key string into an RGB tuple
-    Args:
-        color_key (str): Color key string in the format "(r,g,b,a)" or similar
-    Returns:
-        tuple: RGB tuple (r, g, b)
-    """
-    # Extract numbers using regex to handle different formats
-    numbers = re.findall(r"\d+", color_key)
-    if len(numbers) >= 3:
-        r, g, b = map(int, numbers[:3])
-        return (r, g, b)
-    else:
-        raise ValueError(f"Invalid color key format: {color_key}")
-def save_visualization(mask, frame_num, feature_name, viz_dir):
-    """Save a visualization of the binary mask
-    Args:
-        mask (numpy.ndarray): The mask (values 0 or 255)
-        frame_num (int): The frame number
-        feature_name (str): The name of the feature (depth, vis, edge, seg)
-        viz_dir (str): Directory to save visualizations
-    """
-    # Simply save the binary mask directly
-    output_path = os.path.join(viz_dir, f"{feature_name}_frame_{frame_num:06d}.png")
-    cv2.imwrite(output_path, mask)
-    logger.info(f"Saved binary visualization to {output_path}")
-def process_segmentation_files(
-    segmentation_dir,
-    output_dir,
-    viz_dir,
-    video_path=None,
-    weights_dict=None,
-    setting_name="fg_vis_edge_bg_seg",
-    robot_keywords=None,
-):
-    """Process all segmentation JSON files and create weight matrices
-    Args:
-        segmentation_dir (str): Directory containing segmentation JSON files
-        output_dir (str): Directory to save weight matrices
-        viz_dir (str): Directory to save visualizations
-        video_path (str, optional): Path to the video file. Defaults to None.
-        weights_dict (dict, optional): Dictionary with weights for each feature.
-            Format: {
-                'depth': {'foreground': float, 'background': float},
-                'vis': {'foreground': float, 'background': float},
-                'edge': {'foreground': float, 'background': float},
-                'seg': {'foreground': float, 'background': float}
-            }
-            Values should be in range 0-1. Defaults to None.
-        setting_name (str, optional): Weight setting name. Defaults to 'fg_vis_edge_bg_seg (setting1)'.
-        robot_keywords (list, optional): List of keywords to identify robot classes. Defaults to ["robot"].
-    """
-    # Set default robot keywords if not provided
-    if robot_keywords is None:
-        robot_keywords = ["robot"]
-    # Get all JSON files
-    json_files = sorted(glob.glob(os.path.join(segmentation_dir, "*.json")))
-    logger.info(f"Found {len(json_files)} JSON files")
-    if len(json_files) == 0:
-        raise ValueError(f"No JSON files found in {segmentation_dir}")
-    # For example directories, check for PNG files
-    png_dir = os.path.join(os.path.dirname(segmentation_dir), "segmentation")
-    png_files = []
-    if os.path.exists(png_dir):
-        png_files = sorted(glob.glob(os.path.join(png_dir, "*.png")))
-        logger.info(f"Found {len(png_files)} PNG files in segmentation directory")
-    # Step 1: Create a unified color-to-class mapping from all JSON files
-    logger.info("Creating unified color-to-class mapping...")
-    rgb_to_class = {}
-    rgb_to_is_robot = {}
-    for json_file in tqdm(json_files, desc="Processing JSON files for unified mapping"):
-        with open(json_file, "r") as f:
-            json_data = json.load(f)
-        for color_key, data in json_data.items():
-            color = parse_color_key(color_key)
-            class_name = data["class"]
-            # Store RGB color for matching
-            rgb_to_class[color] = class_name
-            rgb_to_is_robot[color] = any(keyword in class_name for keyword in robot_keywords)
-    # Print statistics about the unified color mapping
-    robot_colors = [color for color, is_robot in rgb_to_is_robot.items() if is_robot]
-    logger.info(f"Unified mapping: Found {len(robot_colors)} robot colors out of {len(rgb_to_is_robot)} total colors")
-    if robot_colors:
-        logger.info(f"Robot classes: {[rgb_to_class[color] for color in robot_colors]}")
-    # Convert color mapping to arrays for vectorized operations
-    colors = list(rgb_to_is_robot.keys())
-    color_array = np.array(colors)
-    is_robot_array = np.array([rgb_to_is_robot[color] for color in colors], dtype=bool)
-    # If we have PNG files, get dimensions from the first PNG
-    if png_files:
-        # Get dimensions from the first PNG file
-        first_png = cv2.imread(png_files[0])
-        if first_png is None:
-            raise ValueError(f"Could not read PNG file: {png_files[0]}")
-        height, width = first_png.shape[:2]
-        frame_count = len(png_files)
-        # Match frame numbers between JSON and PNG files to ensure correct correspondence
-        json_frame_nums = [int(os.path.basename(f).split("_")[-1].split(".")[0]) for f in json_files]
-        png_frame_nums = [int(os.path.basename(f).split("_")[-1].split(".")[0]) for f in png_files]
-        # Find common frames between JSON and PNG files
-        common_frames = sorted(set(json_frame_nums).intersection(set(png_frame_nums)))
-        logger.info(f"Found {len(common_frames)} common frames between JSON and PNG files")
-        if len(common_frames) == 0:
-            raise ValueError("No matching frames found between JSON and PNG files")
-        # Create maps to easily look up files by frame number
-        json_map = {int(os.path.basename(f).split("_")[-1].split(".")[0]): f for f in json_files}
-        png_map = {int(os.path.basename(f).split("_")[-1].split(".")[0]): f for f in png_files}
-        # Create new lists with only matching files
-        json_files = [json_map[frame] for frame in common_frames if frame in json_map]
-        png_files = [png_map[frame] for frame in common_frames if frame in png_map]
-        num_frames = len(json_files)
-        logger.info(f"Using PNG dimensions: {width}x{height}, processing {num_frames} frames")
-    else:
-        # Get video information if no PNG files available
-        try:
-            width, height, frame_count, fps = get_video_info(video_path)
-            logger.info(f"Video dimensions: {width}x{height}, {frame_count} frames, {fps} fps")
-            num_frames = min(len(json_files), frame_count)
-        except Exception as e:
-            logger.warning(f"Warning: Could not get video information: {e}")
-            # Use a default size if we can't get the video info
-            width, height = 640, 480
-            num_frames = len(json_files)
-            logger.info(f"Using default dimensions: {width}x{height}, {num_frames} frames")
-    # Initialize weight tensors
-    depth_weights = torch.zeros((num_frames, height, width))
-    vis_weights = torch.zeros((num_frames, height, width))
-    edge_weights = torch.zeros((num_frames, height, width))
-    seg_weights = torch.zeros((num_frames, height, width))
-    # Process frames
-    if png_files:
-        # Process PNG files directly
-        for i, (json_file, png_file) in enumerate(zip(json_files, png_files)):
-            # Get frame number from filename
-            frame_num = int(os.path.basename(json_file).split("_")[-1].split(".")[0])
-            # Read the corresponding PNG file
-            frame = cv2.imread(png_file)
-            if frame is None:
-                logger.warning(f"Warning: Could not read frame {i} from PNG. Using blank frame.")
-                frame = np.zeros((height, width, 3), dtype=np.uint8)
-            # Convert frame to RGB
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # Calculate total pixels
-            total_pixels = height * width
-            # Vectorized approach for finding nearest colors
-            # Convert frame_rgb to a 2D array of shape (height*width, 3)
-            pixels = frame_rgb.reshape(-1, 3)
-            # Calculate distances between each pixel and each color (vectorized)
-            # This creates a matrix of shape (height*width, num_colors)
-            distances = np.sqrt(np.sum((pixels[:, np.newaxis, :] - color_array[np.newaxis, :, :]) ** 2, axis=2))
-            # Find the index of the nearest color for each pixel
-            nearest_color_indices = np.argmin(distances, axis=1)
-            # Get the is_robot value for each pixel based on its nearest color
-            pixel_is_robot = is_robot_array[nearest_color_indices]
-            # Reshape back to image dimensions
-            pixel_is_robot_2d = pixel_is_robot.reshape(height, width)
-            # Count robot and matched pixels
-            robot_pixel_count = np.sum(pixel_is_robot)
-            matched_pixel_count = pixels.shape[0]  # All pixels are matched now
-            # Create masks based on the is_robot classification
-            depth_mask = np.where(
-                pixel_is_robot_2d, weights_dict["depth"]["foreground"], weights_dict["depth"]["background"]
-            )
-            vis_mask = np.where(pixel_is_robot_2d, weights_dict["vis"]["foreground"], weights_dict["vis"]["background"])
-            edge_mask = np.where(
-                pixel_is_robot_2d, weights_dict["edge"]["foreground"], weights_dict["edge"]["background"]
-            )
-            seg_mask = np.where(pixel_is_robot_2d, weights_dict["seg"]["foreground"], weights_dict["seg"]["background"])
-            # Create visualization mask
-            visualization_mask = np.zeros((height, width), dtype=np.uint8)
-            visualization_mask[pixel_is_robot_2d] = 255
-            # Log statistics
-            robot_percentage = (robot_pixel_count / total_pixels) * 100
-            matched_percentage = (matched_pixel_count / total_pixels) * 100
-            logger.info(f"Frame {frame_num}: {robot_pixel_count} robot pixels ({robot_percentage:.2f}%)")
-            logger.info(f"Frame {frame_num}: {matched_pixel_count} matched pixels ({matched_percentage:.2f}%)")
-            # Save visualizations for this frame
-            save_visualization(visualization_mask, frame_num, "segmentation", viz_dir)
-            # Store the masks in the weight tensors
-            depth_weights[i] = torch.from_numpy(depth_mask)
-            vis_weights[i] = torch.from_numpy(vis_mask)
-            edge_weights[i] = torch.from_numpy(edge_mask)
-            seg_weights[i] = torch.from_numpy(seg_mask)
-    else:
-        # Use video frames if available
-        try:
-            # Open the segmentation video
-            cap = cv2.VideoCapture(video_path)
-            if not cap.isOpened():
-                raise ValueError(f"Could not open video file: {video_path}")
-            # Process each frame using the unified color mapping
-            for i, json_file in enumerate(tqdm(json_files[:num_frames], desc="Processing frames")):
-                # Get frame number from filename
-                frame_num = int(os.path.basename(json_file).split("_")[-1].split(".")[0])
-                # Read the corresponding frame from the video
-                cap.set(cv2.CAP_PROP_POS_FRAMES, i)
-                ret, frame = cap.read()
-                if not ret:
-                    logger.warning(f"Warning: Could not read frame {i} from video. Using blank frame.")
-                    frame = np.zeros((height, width, 3), dtype=np.uint8)
-                # Convert frame to RGB
-                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                # Calculate total pixels
-                total_pixels = height * width
-                # Vectorized approach for finding nearest colors
-                pixels = frame_rgb.reshape(-1, 3)
-                distances = np.sqrt(np.sum((pixels[:, np.newaxis, :] - color_array[np.newaxis, :, :]) ** 2, axis=2))
-                nearest_color_indices = np.argmin(distances, axis=1)
-                pixel_is_robot = is_robot_array[nearest_color_indices]
-                pixel_is_robot_2d = pixel_is_robot.reshape(height, width)
-                # Count robot and matched pixels
-                robot_pixel_count = np.sum(pixel_is_robot)
-                matched_pixel_count = pixels.shape[0]
-                # Create masks based on the is_robot classification
-                depth_mask = np.where(
-                    pixel_is_robot_2d, weights_dict["depth"]["foreground"], weights_dict["depth"]["background"]
-                )
-                vis_mask = np.where(
-                    pixel_is_robot_2d, weights_dict["vis"]["foreground"], weights_dict["vis"]["background"]
-                )
-                edge_mask = np.where(
-                    pixel_is_robot_2d, weights_dict["edge"]["foreground"], weights_dict["edge"]["background"]
-                )
-                seg_mask = np.where(
-                    pixel_is_robot_2d, weights_dict["seg"]["foreground"], weights_dict["seg"]["background"]
-                )
-                # Create visualization mask
-                visualization_mask = np.zeros((height, width), dtype=np.uint8)
-                visualization_mask[pixel_is_robot_2d] = 255
-                # Log statistics
-                robot_percentage = (robot_pixel_count / total_pixels) * 100
-                matched_percentage = (matched_pixel_count / total_pixels) * 100
-                logger.info(f"Frame {frame_num}: {robot_pixel_count} robot pixels ({robot_percentage:.2f}%)")
-                logger.info(f"Frame {frame_num}: {matched_pixel_count} matched pixels ({matched_percentage:.2f}%)")
-                # Save visualizations for this frame
-                save_visualization(visualization_mask, frame_num, "segmentation", viz_dir)
-                # Store the masks in the weight tensors
-                depth_weights[i] = torch.from_numpy(depth_mask)
-                vis_weights[i] = torch.from_numpy(vis_mask)
-                edge_weights[i] = torch.from_numpy(edge_mask)
-                seg_weights[i] = torch.from_numpy(seg_mask)
-            # Close the video capture
-            cap.release()
-        except Exception as e:
-            logger.warning(f"Warning: Error processing video: {e}")
-            logger.warning("Cannot process this example without proper frame data.")
-            raise ValueError(f"Cannot process example without frame data: {e}")
-    # Save weight tensors
-    # Convert weights to half precision (float16) to reduce file size
-    depth_weights_half = depth_weights.to(torch.float16)
-    vis_weights_half = vis_weights.to(torch.float16)
-    edge_weights_half = edge_weights.to(torch.float16)
-    seg_weights_half = seg_weights.to(torch.float16)
-    # Save the half precision tensors
-    torch.save(depth_weights_half, os.path.join(output_dir, "depth_weights.pt"))
-    torch.save(vis_weights_half, os.path.join(output_dir, "vis_weights.pt"))
-    torch.save(edge_weights_half, os.path.join(output_dir, "edge_weights.pt"))
-    torch.save(seg_weights_half, os.path.join(output_dir, "seg_weights.pt"))
-    logger.info(f"Saved weight matrices to {output_dir}")
-    logger.info(f"Weight matrix shape: {depth_weights_half.shape}, dtype: {depth_weights_half.dtype}")
-    logger.info(f"Saved visualizations to {viz_dir}")
-    return output_dir, viz_dir
-def process_all_examples(input_dir, output_dir, setting_name="fg_vis_edge_bg_seg", robot_keywords=None):
-    """Process all example directories in the provided input directory
-    Args:
-        input_dir (str): Input directory containing example folders
-        output_dir (str): Output directory for weight matrices
-        setting_name (str, optional): Weight setting name. Defaults to 'fg_vis_edge_bg_seg'.
-        robot_keywords (list, optional): List of keywords to identify robot classes. Defaults to None.
-    """
-    # Find all example directories
-    if not os.path.exists(input_dir):
-        logger.error(f"Input directory not found: {input_dir}")
-        return []
-    # List example directories
-    examples = [d for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))]
-    examples = sorted(examples)
-    if not examples:
-        logger.warning("No example directories found.")
-        return []
-    # Print found examples
-    logger.info(f"Found {len(examples)} example directories:")
-    for example in examples:
-        logger.info(f"  - {example}")
-    # Store processing results
-    results = []
-    # Process each example
-    for example in examples:
-        try:
-            logger.info(f"\nProcessing {example}...")
-            # Process this example with custom directories
-            out_dir, viz_dir = process_example_with_dirs(example, input_dir, output_dir, setting_name, robot_keywords)
-            results.append((example, out_dir, viz_dir))
-            logger.info(f"Results for {example} saved to:")
-            logger.info(f"  Weight matrices: {out_dir}")
-            logger.info(f"  Visualizations: {viz_dir}")
-        except Exception as e:
-            logger.error(f"Error processing {example}: {e}")
-    logger.info("\nAll examples processed.")
-    return results
-# Process a specific example with custom input and output directories
-def process_example_with_dirs(
-    example_name, input_dir, output_dir, setting_name="fg_vis_edge_bg_seg", robot_keywords=None
-):
-    """Process a specific example with custom input and output directories
-    Args:
-        example_name (str): Name of the example directory
-        input_dir (str): Path to input directory containing example folders
-        output_dir (str): Path to output directory for weight matrices
-        setting_name (str, optional): Weight setting name. Defaults to 'fg_vis_edge_bg_seg'.
-        robot_keywords (list, optional): List of keywords to identify robot classes. Defaults to None.
-    """
-    # Create paths for this example
-    example_dir = os.path.join(input_dir, example_name)
-    segmentation_dir = os.path.join(example_dir, "segmentation_label")
-    video_path = os.path.join(example_dir, "segmentation.mp4")
-    # Create output directories
-    example_output_dir = os.path.join(output_dir, example_name)
-    viz_dir = os.path.join(example_output_dir, "visualizations")
-    # Check if weight files already exist
-    depth_weights_path = os.path.join(example_output_dir, "depth_weights.pt")
-    if os.path.exists(depth_weights_path):
-        logger.info(f"Weight files already exist for {example_name}, skipping processing")
-        return example_output_dir, viz_dir
-    # Create output directories if they don't exist
-    os.makedirs(example_output_dir, exist_ok=True)
-    os.makedirs(viz_dir, exist_ok=True)
-    # Get weight settings
-    weights_dict = WeightSettings.get_settings(setting_name)
-    # Process this example directly with paths
-    return process_segmentation_files(
-        segmentation_dir=segmentation_dir,
-        output_dir=example_output_dir,
-        viz_dir=viz_dir,
-        video_path=video_path,
-        weights_dict=weights_dict,
-        setting_name=setting_name,
-        robot_keywords=robot_keywords,
-    )
-if __name__ == "__main__":
-    # Parse command-line arguments
-    parser = argparse.ArgumentParser(
-        description="Process segmentation files to generate spatial-temporal weight matrices"
-    )
-    parser.add_argument(
-        "--setting",
-        type=str,
-        default="fg_vis_edge_bg_seg",
-        choices=WeightSettings.list_settings(),
-        help="Weight setting to use (default: fg_vis_edge_bg_seg (setting1), fg_edge_bg_seg (setting2))",
-    )
-    parser.add_argument(
-        "--input-dir",
-        type=str,
-        default="assets/robot_augmentation_example",
-        help="Input directory containing example folders",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        default="outputs/robot_augmentation_example",
-        help="Output directory for weight matrices",
-    )
-    parser.add_argument(
-        "--robot-keywords",
-        type=str,
-        nargs="+",
-        default=["world_robot", "gripper", "robot"],
-        help="Keywords used to identify robot classes (default: world_robot gripper robot)",
-    )
-    parser.add_argument(
-        "--log-level",
-        type=str,
-        default="INFO",
-        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
-        help="Set the logging level",
-    )
-    args = parser.parse_args()
-    # Set logging level from command line argument
-    logger.setLevel(getattr(logging, args.log_level))
-    # Get directories from arguments
-    input_dir = args.input_dir
-    output_dir = args.output_dir
-    setting_name = args.setting
-    robot_keywords = args.robot_keywords
-    logger.info(f"Using input directory: {input_dir}")
-    logger.info(f"Using output directory: {output_dir}")
-    logger.info(f"Using weight setting: {setting_name}")
-    logger.info(f"Using robot keywords: {robot_keywords}")
-    # Process all examples with the provided input and output directories
-    process_all_examples(input_dir, output_dir, setting_name, robot_keywords)

cosmos-transfer1/cosmos_transfer1/auxiliary/sam2/sam2_model.py DELETED Viewed

@@ -1,392 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import sys
-import numpy as np
-import pycocotools.mask as mask_util
-import torch
-from cosmos_transfer1.utils import log
-sys.path.append("cosmos_transfer1/auxiliary")
-import tempfile
-from PIL import Image
-from sam2.sam2_video_predictor import SAM2VideoPredictor
-from transformers import AutoModelForZeroShotObjectDetection, AutoProcessor
-from cosmos_transfer1.auxiliary.sam2.sam2_utils import (
-    capture_fps,
-    convert_masks_to_frames,
-    generate_tensor_from_images,
-    video_to_frames,
-    write_video,
-)
-from cosmos_transfer1.checkpoints import GROUNDING_DINO_MODEL_CHECKPOINT, SAM2_MODEL_CHECKPOINT
-def rle_encode(mask: np.ndarray) -> dict:
-    """
-    Encode a boolean mask (of shape (T, H, W)) using the pycocotools RLE format,
-    matching the format of eff_segmentation.RleMaskSAMv2 (from Yotta).
-    The procedure is:
-      1. Convert the mask to a numpy array in Fortran order.
-      2. Reshape the array to (-1, 1) (i.e. flatten in Fortran order).
-      3. Call pycocotools.mask.encode on the reshaped array.
-      4. Return a dictionary with the encoded data and the original mask shape.
-    """
-    mask = np.array(mask, order="F")
-    # Reshape the mask to (-1, 1) in Fortran order and encode it.
-    encoded = mask_util.encode(np.array(mask.reshape(-1, 1), order="F"))
-    return {"data": encoded, "mask_shape": mask.shape}
-class VideoSegmentationModel:
-    def __init__(self, **kwargs):
-        """Initialize the model and load all required components."""
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Initialize SAM2 predictor
-        self.sam2_predictor = SAM2VideoPredictor.from_pretrained(SAM2_MODEL_CHECKPOINT).to(self.device)
-        # Initialize GroundingDINO for text-based detection
-        self.grounding_model_name = kwargs.get("grounding_model", GROUNDING_DINO_MODEL_CHECKPOINT)
-        self.processor = AutoProcessor.from_pretrained(self.grounding_model_name)
-        self.grounding_model = AutoModelForZeroShotObjectDetection.from_pretrained(self.grounding_model_name).to(
-            self.device
-        )
-    def get_boxes_from_text(self, image_path, text_prompt):
-        """Get bounding boxes (and labels) from a text prompt using GroundingDINO."""
-        image = Image.open(image_path).convert("RGB")
-        inputs = self.processor(images=image, text=text_prompt, return_tensors="pt").to(self.device)
-        with torch.no_grad():
-            outputs = self.grounding_model(**inputs)
-        # Try with initial thresholds.
-        results = self.processor.post_process_grounded_object_detection(
-            outputs,
-            inputs.input_ids,
-            box_threshold=0.15,
-            text_threshold=0.25,
-            target_sizes=[image.size[::-1]],
-        )
-        boxes = results[0]["boxes"].cpu().numpy()
-        scores = results[0]["scores"].cpu().numpy()
-        labels = results[0].get("labels", None)
-        if len(boxes) == 0:
-            print(f"No boxes detected for prompt: '{text_prompt}'. Trying with lower thresholds...")
-            results = self.processor.post_process_grounded_object_detection(
-                outputs,
-                inputs.input_ids,
-                box_threshold=0.1,
-                text_threshold=0.1,
-                target_sizes=[image.size[::-1]],
-            )
-            boxes = results[0]["boxes"].cpu().numpy()
-            scores = results[0]["scores"].cpu().numpy()
-            labels = results[0].get("labels", None)
-        if len(boxes) > 0:
-            print(f"Found {len(boxes)} boxes with scores: {scores}")
-            # Sort boxes by confidence score in descending order
-            sorted_indices = np.argsort(scores)[::-1]
-            boxes = boxes[sorted_indices]
-            scores = scores[sorted_indices]
-            if labels is not None:
-                labels = np.array(labels)[sorted_indices]
-        else:
-            print("Still no boxes detected. Consider adjusting the prompt or using box/points mode.")
-        return {"boxes": boxes, "labels": labels, "scores": scores}
-    def visualize_frame(self, frame_idx, obj_ids, masks, video_dir, frame_names, visualization_data, save_dir=None):
-        """
-        Process a single frame: load the image, apply the segmentation mask to black out the
-        detected object(s), and save both the masked frame and the binary mask image.
-        """
-        # Load the frame.
-        frame_path = os.path.join(video_dir, frame_names[frame_idx])
-        img = Image.open(frame_path).convert("RGB")
-        image_np = np.array(img)
-        # Combine masks from the detection output.
-        if isinstance(masks, torch.Tensor):
-            mask_np = (masks[0] > 0.0).cpu().numpy().astype(bool)
-            combined_mask = mask_np
-        elif isinstance(masks, dict):
-            first_mask = next(iter(masks.values()))
-            combined_mask = np.zeros_like(first_mask, dtype=bool)
-            for m in masks.values():
-                combined_mask |= m
-        else:
-            combined_mask = None
-        if combined_mask is not None:
-            combined_mask = np.squeeze(combined_mask)
-            # If the mask shape doesn't match the image, resize it.
-            if combined_mask.shape != image_np.shape[:2]:
-                mask_img = Image.fromarray((combined_mask.astype(np.uint8)) * 255)
-                mask_img = mask_img.resize((image_np.shape[1], image_np.shape[0]), resample=Image.NEAREST)
-                combined_mask = np.array(mask_img) > 127
-            # Black out the detected region.
-            image_np[combined_mask] = 0
-            mask_image = (combined_mask.astype(np.uint8)) * 255
-            mask_pil = Image.fromarray(mask_image)
-        if save_dir:
-            seg_frame_path = os.path.join(save_dir, f"frame_{frame_idx}_segmented.png")
-            seg_pil = Image.fromarray(image_np)
-            seg_pil.save(seg_frame_path)
-            if combined_mask is not None:
-                mask_save_path = os.path.join(save_dir, f"frame_{frame_idx}_mask.png")
-                mask_pil.save(mask_save_path)
-    def sample(self, **kwargs):
-        """
-        Main sampling function for video segmentation.
-        Returns a list of detections in which each detection contains a phrase and
-        an RLE-encoded segmentation mask (matching the output of the Grounded SAM model).
-        """
-        video_dir = kwargs.get("video_dir", "")
-        mode = kwargs.get("mode", "points")
-        input_data = kwargs.get("input_data", None)
-        save_dir = kwargs.get("save_dir", None)
-        visualize = kwargs.get("visualize", False)
-        # Get frame names (expecting frames named as numbers with .jpg/.jpeg extension).
-        frame_names = [p for p in os.listdir(video_dir) if os.path.splitext(p)[-1].lower() in [".jpg", ".jpeg"]]
-        frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
-        with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
-            state = self.sam2_predictor.init_state(video_path=video_dir)
-            ann_frame_idx = 0
-            ann_obj_id = 1
-            boxes = None
-            points = None
-            labels = None
-            box = None
-            visualization_data = {"mode": mode, "points": None, "labels": None, "box": None, "boxes": None}
-            if input_data is not None:
-                if mode == "points":
-                    points = input_data.get("points")
-                    labels = input_data.get("labels")
-                    frame_idx, obj_ids, masks = self.sam2_predictor.add_new_points_or_box(
-                        inference_state=state, frame_idx=ann_frame_idx, obj_id=ann_obj_id, points=points, labels=labels
-                    )
-                    visualization_data["points"] = points
-                    visualization_data["labels"] = labels
-                elif mode == "box":
-                    box = input_data.get("box")
-                    frame_idx, obj_ids, masks = self.sam2_predictor.add_new_points_or_box(
-                        inference_state=state, frame_idx=ann_frame_idx, obj_id=ann_obj_id, box=box
-                    )
-                    visualization_data["box"] = box
-                elif mode == "prompt":
-                    text = input_data.get("text")
-                    first_frame_path = os.path.join(video_dir, frame_names[0])
-                    gd_results = self.get_boxes_from_text(first_frame_path, text)
-                    boxes = gd_results["boxes"]
-                    labels_out = gd_results["labels"]
-                    scores = gd_results["scores"]
-                    log.info(f"scores: {scores}")
-                    if len(boxes) > 0:
-                        legacy_mask = kwargs.get("legacy_mask", False)
-                        if legacy_mask:
-                            # Use only the highest confidence box for legacy mask
-                            log.info(f"using legacy_mask: {legacy_mask}")
-                            frame_idx, obj_ids, masks = self.sam2_predictor.add_new_points_or_box(
-                                inference_state=state, frame_idx=ann_frame_idx, obj_id=ann_obj_id, box=boxes[0]
-                            )
-                            # Update boxes and labels after processing
-                            boxes = boxes[:1]
-                            if labels_out is not None:
-                                labels_out = labels_out[:1]
-                        else:
-                            log.info(f"using new_mask: {legacy_mask}")
-                            for object_id, (box, label) in enumerate(zip(boxes, labels_out)):
-                                frame_idx, obj_ids, masks = self.sam2_predictor.add_new_points_or_box(
-                                    inference_state=state, frame_idx=ann_frame_idx, obj_id=object_id, box=box
-                                )
-                        visualization_data["boxes"] = boxes
-                        self.grounding_labels = [str(lbl) for lbl in labels_out] if labels_out is not None else [text]
-                    else:
-                        print("No boxes detected. Exiting.")
-                        return []  # Return empty list if no detections
-                if visualize:
-                    self.visualize_frame(
-                        frame_idx=ann_frame_idx,
-                        obj_ids=obj_ids,
-                        masks=masks,
-                        video_dir=video_dir,
-                        frame_names=frame_names,
-                        visualization_data=visualization_data,
-                        save_dir=save_dir,
-                    )
-            video_segments = {}  # keys: frame index, values: {obj_id: mask}
-            for out_frame_idx, out_obj_ids, out_mask_logits in self.sam2_predictor.propagate_in_video(state):
-                video_segments[out_frame_idx] = {
-                    out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy() for i, out_obj_id in enumerate(out_obj_ids)
-                }
-                # For propagated frames, visualization_data is not used.
-                if visualize:
-                    propagate_visualization_data = {
-                        "mode": mode,
-                        "points": None,
-                        "labels": None,
-                        "box": None,
-                        "boxes": None,
-                    }
-                    self.visualize_frame(
-                        frame_idx=out_frame_idx,
-                        obj_ids=out_obj_ids,
-                        masks=video_segments[out_frame_idx],
-                        video_dir=video_dir,
-                        frame_names=frame_names,
-                        visualization_data=propagate_visualization_data,
-                        save_dir=save_dir,
-                    )
-        # --- Post-process video_segments to produce a list of detections ---
-        if len(video_segments) == 0:
-            return []
-        first_frame_path = os.path.join(video_dir, frame_names[0])
-        first_frame = np.array(Image.open(first_frame_path).convert("RGB"))
-        original_shape = first_frame.shape[:2]  # (height, width)
-        object_masks = {}  # key: obj_id, value: list of 2D boolean masks
-        sorted_frame_indices = sorted(video_segments.keys())
-        for frame_idx in sorted_frame_indices:
-            segments = video_segments[frame_idx]
-            for obj_id, mask in segments.items():
-                mask = np.squeeze(mask)
-                if mask.ndim != 2:
-                    print(f"Warning: Unexpected mask shape {mask.shape} for object {obj_id} in frame {frame_idx}.")
-                    continue
-                if mask.shape != original_shape:
-                    mask_img = Image.fromarray(mask.astype(np.uint8) * 255)
-                    mask_img = mask_img.resize((original_shape[1], original_shape[0]), resample=Image.NEAREST)
-                    mask = np.array(mask_img) > 127
-                if obj_id not in object_masks:
-                    object_masks[obj_id] = []
-                object_masks[obj_id].append(mask)
-        detections = []
-        for obj_id, mask_list in object_masks.items():
-            mask_stack = np.stack(mask_list, axis=0)  # shape: (T, H, W)
-            # Use our new rle_encode (which now follows the eff_segmentation.RleMaskSAMv2 format)
-            rle = rle_encode(mask_stack)
-            if mode == "prompt" and hasattr(self, "grounding_labels"):
-                phrase = self.grounding_labels[0]
-            else:
-                phrase = input_data.get("text", "")
-            detection = {"phrase": phrase, "segmentation_mask_rle": rle}
-            detections.append(detection)
-        return detections
-    @staticmethod
-    def parse_points(points_str):
-        """Parse a string of points into a numpy array.
-        Supports a single point ('200,300') or multiple points separated by ';' (e.g., '200,300;100,150').
-        """
-        points = []
-        for point in points_str.split(";"):
-            coords = point.split(",")
-            if len(coords) != 2:
-                continue
-            points.append([float(coords[0]), float(coords[1])])
-        return np.array(points, dtype=np.float32)
-    @staticmethod
-    def parse_labels(labels_str):
-        """Parse a comma-separated string of labels into a numpy array."""
-        return np.array([int(x) for x in labels_str.split(",")], dtype=np.int32)
-    @staticmethod
-    def parse_box(box_str):
-        """Parse a comma-separated string of 4 box coordinates into a numpy array."""
-        return np.array([float(x) for x in box_str.split(",")], dtype=np.float32)
-    def __call__(
-        self,
-        input_video,
-        output_video=None,
-        output_tensor=None,
-        prompt=None,
-        box=None,
-        points=None,
-        labels=None,
-        weight_scaler=None,
-        binarize_video=False,
-        legacy_mask=False,
-    ):
-        log.info(
-            f"Processing video: {input_video} to generate segmentation video: {output_video} segmentation tensor: {output_tensor}"
-        )
-        assert os.path.exists(input_video)
-        # Prepare input data based on the selected mode.
-        if points is not None:
-            mode = "points"
-            input_data = {"points": self.parse_points(points), "labels": self.parse_labels(labels)}
-        elif box is not None:
-            mode = "box"
-            input_data = {"box": self.parse_box(box)}
-        elif prompt is not None:
-            mode = "prompt"
-            input_data = {"text": prompt}
-        with tempfile.TemporaryDirectory() as temp_input_dir:
-            fps = capture_fps(input_video)
-            video_to_frames(input_video, temp_input_dir)
-            with tempfile.TemporaryDirectory() as temp_output_dir:
-                masks = self.sample(
-                    video_dir=temp_input_dir,
-                    mode=mode,
-                    input_data=input_data,
-                    save_dir=str(temp_output_dir),
-                    visualize=True,
-                    legacy_mask=legacy_mask,
-                )
-                if output_video:
-                    os.makedirs(os.path.dirname(output_video), exist_ok=True)
-                    frames = convert_masks_to_frames(masks)
-                    if binarize_video:
-                        frames = np.any(frames > 0, axis=-1).astype(np.uint8) * 255
-                    write_video(frames, output_video, fps)
-                if output_tensor:
-                    generate_tensor_from_images(
-                        temp_output_dir, output_tensor, fps, "mask", weight_scaler=weight_scaler
-                    )

cosmos-transfer1/cosmos_transfer1/auxiliary/sam2/sam2_pipeline.py DELETED Viewed

@@ -1,126 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import tempfile
-import numpy as np
-from cosmos_transfer1.auxiliary.sam2.sam2_model import VideoSegmentationModel
-from cosmos_transfer1.auxiliary.sam2.sam2_utils import (
-    capture_fps,
-    generate_tensor_from_images,
-    generate_video_from_images,
-    video_to_frames,
-)
-def parse_args():
-    parser = argparse.ArgumentParser(description="Video Segmentation using SAM2")
-    parser.add_argument("--input_video", type=str, required=True, help="Path to input video file")
-    parser.add_argument(
-        "--output_video", type=str, default="./outputs/output_video.mp4", help="Path to save the output video"
-    )
-    parser.add_argument(
-        "--output_tensor", type=str, default="./outputs/output_tensor.pt", help="Path to save the output tensor"
-    )
-    parser.add_argument(
-        "--mode", type=str, choices=["points", "box", "prompt"], default="points", help="Segmentation mode"
-    )
-    parser.add_argument("--prompt", type=str, help="Text prompt for prompt mode")
-    parser.add_argument(
-        "--grounding_model_path",
-        type=str,
-        default="IDEA-Research/grounding-dino-tiny",
-        help="Local directory for GroundingDINO model files",
-    )
-    parser.add_argument(
-        "--points",
-        type=str,
-        default="200,300",
-        help="Comma-separated point coordinates for points mode (e.g., '200,300' or for multiple points use ';' as a separator, e.g., '200,300;100,150').",
-    )
-    parser.add_argument(
-        "--labels",
-        type=str,
-        default="1",
-        help="Comma-separated labels for points mode (e.g., '1' or '1,0' for multiple points).",
-    )
-    parser.add_argument(
-        "--box",
-        type=str,
-        default="300,0,500,400",
-        help="Comma-separated box coordinates for box mode (e.g., '300,0,500,400').",
-    )
-    # New flag to control visualization.
-    parser.add_argument("--visualize", action="store_true", help="If set, visualize segmentation frames (save images)")
-    return parser.parse_args()
-def parse_points(points_str):
-    """Parse a string of points into a numpy array.
-    Supports a single point ('200,300') or multiple points separated by ';' (e.g., '200,300;100,150').
-    """
-    points = []
-    for point in points_str.split(";"):
-        coords = point.split(",")
-        if len(coords) != 2:
-            continue
-        points.append([float(coords[0]), float(coords[1])])
-    return np.array(points, dtype=np.float32)
-def parse_labels(labels_str):
-    """Parse a comma-separated string of labels into a numpy array."""
-    return np.array([int(x) for x in labels_str.split(",")], dtype=np.int32)
-def parse_box(box_str):
-    """Parse a comma-separated string of 4 box coordinates into a numpy array."""
-    return np.array([float(x) for x in box_str.split(",")], dtype=np.float32)
-def main():
-    args = parse_args()
-    # Initialize the segmentation model.
-    model = VideoSegmentationModel(**vars(args))
-    # Prepare input data based on the selected mode.
-    if args.mode == "points":
-        input_data = {"points": parse_points(args.points), "labels": parse_labels(args.labels)}
-    elif args.mode == "box":
-        input_data = {"box": parse_box(args.box)}
-    elif args.mode == "prompt":
-        input_data = {"text": args.prompt}
-    with tempfile.TemporaryDirectory() as temp_input_dir:
-        fps = capture_fps(args.input_video)
-        video_to_frames(args.input_video, temp_input_dir)
-        with tempfile.TemporaryDirectory() as temp_output_dir:
-            model.sample(
-                video_dir=temp_input_dir,
-                mode=args.mode,
-                input_data=input_data,
-                save_dir=str(temp_output_dir),
-                visualize=True,
-            )
-            generate_video_from_images(temp_output_dir, args.output_video, fps)
-            generate_tensor_from_images(temp_output_dir, args.output_tensor, fps, "mask")
-if __name__ == "__main__":
-    print("Starting video segmentation...")
-    main()

cosmos-transfer1/cosmos_transfer1/auxiliary/sam2/sam2_utils.py DELETED Viewed

@@ -1,168 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import tempfile
-import time
-import cv2
-import imageio
-import numpy as np
-import pycocotools.mask
-import torch
-from natsort import natsorted
-from PIL import Image
-from torchvision import transforms
-from cosmos_transfer1.diffusion.datasets.augmentors.control_input import (
-    decode_partial_rle_width1,
-    segmentation_color_mask,
-)
-from cosmos_transfer1.utils import log
-def write_video(frames, output_path, fps=30):
-    """
-    expects a sequence of [H, W, 3] or [H, W] frames
-    """
-    with imageio.get_writer(output_path, fps=fps, macro_block_size=8) as writer:
-        for frame in frames:
-            if len(frame.shape) == 2:  # single channel
-                frame = frame[:, :, None].repeat(3, axis=2)
-            writer.append_data(frame)
-def capture_fps(input_video_path: str):
-    cap = cv2.VideoCapture(input_video_path)
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    return fps
-def video_to_frames(input_loc, output_loc):
-    """Function to extract frames from input video file
-    and save them as separate frames in an output directory.
-    Args:
-        input_loc: Input video file.
-        output_loc: Output directory to save the frames.
-    Returns:
-        None
-    """
-    try:
-        os.mkdir(output_loc)
-    except OSError:
-        pass
-    # Log the time
-    time_start = time.time()
-    # Start capturing the feed
-    cap = cv2.VideoCapture(input_loc)
-    # Find the number of frames
-    video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    print(f"Number of frames: {video_length}")
-    count = 0
-    print("Converting video..\n")
-    # Start converting the video
-    while cap.isOpened():
-        # Extract the frame
-        ret, frame = cap.read()
-        if not ret:
-            continue
-        # Write the results back to output location.
-        cv2.imwrite(output_loc + "/%#05d.jpg" % (count + 1), frame)
-        count = count + 1
-        # If there are no more frames left
-        if count > (video_length - 1):
-            # Log the time again
-            time_end = time.time()
-            # Release the feed
-            cap.release()
-            # Print stats
-            print("Done extracting frames.\n%d frames extracted" % count)
-            print("It took %d seconds forconversion." % (time_end - time_start))
-            break
-# Function to generate video
-def convert_masks_to_frames(masks: list, num_masks_max: int = 100):
-    T, H, W = shape = masks[0]["segmentation_mask_rle"]["mask_shape"]
-    frame_start, frame_end = 0, T
-    num_masks = min(num_masks_max, len(masks))
-    mask_ids_select = np.arange(num_masks).tolist()
-    all_masks = np.zeros((num_masks, T, H, W), dtype=np.uint8)
-    for idx, mid in enumerate(mask_ids_select):
-        mask = masks[mid]
-        num_byte_per_mb = 1024 * 1024
-        # total number of elements in uint8 (1 byte) / num_byte_per_mb
-        if shape[0] * shape[1] * shape[2] / num_byte_per_mb > 256:
-            rle = decode_partial_rle_width1(
-                mask["segmentation_mask_rle"]["data"],
-                frame_start * shape[1] * shape[2],
-                frame_end * shape[1] * shape[2],
-            )
-            partial_shape = (frame_end - frame_start, shape[1], shape[2])
-            rle = rle.reshape(partial_shape) * 255
-        else:
-            rle = pycocotools.mask.decode(mask["segmentation_mask_rle"]["data"])
-            rle = rle.reshape(shape) * 255
-            # Select the frames that are in the video
-            frame_indices = np.arange(frame_start, frame_end).tolist()
-            rle = np.stack([rle[i] for i in frame_indices])
-        all_masks[idx] = rle
-        del rle
-    all_masks = segmentation_color_mask(all_masks)  # NTHW -> 3THW
-    all_masks = all_masks.transpose(1, 2, 3, 0)
-    return all_masks
-def generate_video_from_images(masks: list, output_file_path: str, fps, num_masks_max: int = 100):
-    all_masks = convert_masks_to_frames(masks, num_masks_max)
-    write_video(all_masks, output_file_path, fps)
-    print("Video generated successfully!")
-def generate_tensor_from_images(
-    image_path_str: str, output_file_path: str, fps, search_pattern: str = None, weight_scaler: float = None
-):
-    images = list()
-    image_path = os.path.abspath(image_path_str)
-    if search_pattern is None:
-        images = [img for img in natsorted(os.listdir(image_path))]
-    else:
-        for img in natsorted(os.listdir(image_path)):
-            if img.__contains__(search_pattern):
-                images.append(img)
-    transform = transforms.ToTensor()
-    image_tensors = list()
-    for image in images:
-        img_tensor = transform(Image.open(os.path.join(image_path, image)))
-        image_tensors.append(img_tensor.squeeze(0))
-    tensor = torch.stack(image_tensors)  # [T, H, W], binary values, float
-    if weight_scaler is not None:
-        log.info(f"scaling the tensor by the specified scale: {weight_scaler}")
-        tensor = tensor * weight_scaler
-    log.info(f"saving tensor shape: {tensor.shape} to {output_file_path}")
-    torch.save(tensor, output_file_path)
-if __name__ == "__main__":
-    input_loc = "cosmos_transfer1/models/sam2/assets/input_video.mp4"
-    output_loc = os.path.abspath(tempfile.TemporaryDirectory().name)
-    print(f"output_loc --- {output_loc}")
-    video_to_frames(input_loc, output_loc)

cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/image_cli.py DELETED Viewed

@@ -1,188 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""A CLI to run ImageTokenizer on plain images based on torch.jit.
-Usage:
-    python3 -m cosmos_transfer1.auxiliary.tokenizer.inference.image_cli \
-        --image_pattern 'path/to/input/folder/*.jpg' \
-        --output_dir ./reconstructions \
-        --checkpoint_enc ./checkpoints/<model-name>/encoder.jit \
-        --checkpoint_dec ./checkpoints/<model-name>/decoder.jit
-    Optionally, you can run the model in pure PyTorch mode:
-    python3 -m cosmos_transfer1.auxiliary.tokenizer.inference.image_cli \
-        --image_pattern 'path/to/input/folder/*.jpg' \
-        --mode torch \
-        --tokenizer_type CI \
-        --spatial_compression 8 \
-        --checkpoint_enc ./checkpoints/<model-name>/encoder.jit \
-        --checkpoint_dec ./checkpoints/<model-name>/decoder.jit
-"""
-import os
-import sys
-from argparse import ArgumentParser, Namespace
-from typing import Any
-import numpy as np
-from loguru import logger as logging
-from cosmos_transfer1.auxiliary.tokenizer.inference.image_lib import ImageTokenizer
-from cosmos_transfer1.auxiliary.tokenizer.inference.utils import (
-    get_filepaths,
-    get_output_filepath,
-    read_image,
-    resize_image,
-    write_image,
-)
-from cosmos_transfer1.auxiliary.tokenizer.networks import TokenizerConfigs
-def _parse_args() -> tuple[Namespace, dict[str, Any]]:
-    parser = ArgumentParser(description="A CLI for running ImageTokenizer on plain images.")
-    parser.add_argument(
-        "--image_pattern",
-        type=str,
-        default="path/to/images/*.jpg",
-        help="Glob pattern.",
-    )
-    parser.add_argument(
-        "--checkpoint",
-        type=str,
-        default=None,
-        help="JIT full Autoencoder model filepath.",
-    )
-    parser.add_argument(
-        "--checkpoint_enc",
-        type=str,
-        default=None,
-        help="JIT Encoder model filepath.",
-    )
-    parser.add_argument(
-        "--checkpoint_dec",
-        type=str,
-        default=None,
-        help="JIT Decoder model filepath.",
-    )
-    parser.add_argument(
-        "--tokenizer_type",
-        type=str,
-        choices=["CI", "DI"],
-        help="Specifies the tokenizer type.",
-    )
-    parser.add_argument(
-        "--spatial_compression",
-        type=int,
-        choices=[8, 16],
-        default=8,
-        help="The spatial compression factor.",
-    )
-    parser.add_argument(
-        "--mode",
-        type=str,
-        choices=["torch", "jit"],
-        default="jit",
-        help="Specify the backend: native 'torch' or 'jit' (default: 'jit')",
-    )
-    parser.add_argument(
-        "--short_size",
-        type=int,
-        default=None,
-        help="The size to resample inputs. None, by default.",
-    )
-    parser.add_argument(
-        "--dtype",
-        type=str,
-        default="bfloat16",
-        help="Sets the precision. Default bfloat16.",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cuda",
-        help="Device for invoking the model.",
-    )
-    parser.add_argument("--output_dir", type=str, default=None, help="Output directory.")
-    parser.add_argument(
-        "--save_input",
-        action="store_true",
-        help="If on, the input image will be be outputed too.",
-    )
-    args = parser.parse_args()
-    return args
-logging.info("Initializes args ...")
-args = _parse_args()
-if args.mode == "torch" and args.tokenizer_type not in ["CI", "DI"]:
-    logging.error("'torch' backend requires the tokenizer_type of 'CI' or 'DI'.")
-    sys.exit(1)
-def _run_eval() -> None:
-    """Invokes the evaluation pipeline."""
-    if args.checkpoint_enc is None and args.checkpoint_dec is None and args.checkpoint is None:
-        logging.warning("Aborting. Both encoder or decoder JIT required. Or provide the full autoencoder JIT model.")
-        return
-    if args.mode == "torch":
-        tokenizer_config = TokenizerConfigs[args.tokenizer_type].value
-        tokenizer_config.update(dict(spatial_compression=args.spatial_compression))
-    else:
-        tokenizer_config = None
-    logging.info(
-        f"Loading a torch.jit model `{os.path.dirname(args.checkpoint or args.checkpoint_enc or args.checkpoint_dec)}` ..."
-    )
-    autoencoder = ImageTokenizer(
-        checkpoint=args.checkpoint,
-        checkpoint_enc=args.checkpoint_enc,
-        checkpoint_dec=args.checkpoint_dec,
-        tokenizer_config=tokenizer_config,
-        device=args.device,
-        dtype=args.dtype,
-    )
-    filepaths = get_filepaths(args.image_pattern)
-    logging.info(f"Found {len(filepaths)} images from {args.image_pattern}.")
-    for filepath in filepaths:
-        logging.info(f"Reading image {filepath} ...")
-        image = read_image(filepath)
-        image = resize_image(image, short_size=args.short_size)
-        batch_image = np.expand_dims(image, axis=0)
-        logging.info("Invoking the autoencoder model in ... ")
-        output_image = autoencoder(batch_image)[0]
-        output_filepath = get_output_filepath(filepath, output_dir=args.output_dir)
-        logging.info(f"Outputing {output_filepath} ...")
-        write_image(output_filepath, output_image)
-        if args.save_input:
-            ext = os.path.splitext(output_filepath)[-1]
-            input_filepath = output_filepath.replace(ext, "_input" + ext)
-            write_image(input_filepath, image)
-@logging.catch(reraise=True)
-def main() -> None:
-    _run_eval()
-if __name__ == "__main__":
-    main()

cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/image_lib.py DELETED Viewed

@@ -1,124 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""A library for image tokenizers inference."""
-from typing import Any
-import numpy as np
-import torch
-from cosmos_transfer1.auxiliary.tokenizer.inference.utils import (
-    load_decoder_model,
-    load_encoder_model,
-    load_model,
-    numpy2tensor,
-    pad_image_batch,
-    tensor2numpy,
-    unpad_image_batch,
-)
-class ImageTokenizer(torch.nn.Module):
-    def __init__(
-        self,
-        checkpoint: str = None,
-        checkpoint_enc: str = None,
-        checkpoint_dec: str = None,
-        tokenizer_config: dict[str, Any] = None,
-        device: str = "cuda",
-        dtype: str = "bfloat16",
-    ) -> None:
-        super().__init__()
-        self._device = device
-        self._dtype = getattr(torch, dtype)
-        self._full_model = (
-            load_model(checkpoint, tokenizer_config, device).to(self._dtype) if checkpoint is not None else None
-        )
-        self._enc_model = (
-            load_encoder_model(checkpoint_enc, tokenizer_config, device).to(self._dtype)
-            if checkpoint_enc is not None
-            else None
-        )
-        self._dec_model = (
-            load_decoder_model(checkpoint_dec, tokenizer_config, device).to(self._dtype)
-            if checkpoint_dec is not None
-            else None
-        )
-    @torch.no_grad()
-    def autoencode(self, input_tensor: torch.Tensor) -> torch.Tensor:
-        """Reconstrcuts a batch of image tensors after embedding into a latent.
-        Args:
-            input_tensor: The input image Bx3xHxW layout, range [-1..1].
-        Returns:
-            The reconstructed tensor, layout Bx3xHxW, range [-1..1].
-        """
-        if self._full_model is not None:
-            output_tensor = self._full_model(input_tensor)
-            output_tensor = output_tensor[0] if isinstance(output_tensor, tuple) else output_tensor
-        else:
-            output_latent = self.encode(input_tensor)[0]
-            output_tensor = self.decode(output_latent)
-        return output_tensor
-    @torch.no_grad()
-    def decode(self, input_latent: torch.Tensor) -> torch.Tensor:
-        """Decodes an image from a provided latent embedding.
-        Args:
-            input_latent: The continuous latent Bx16xhxw for CI,
-                    or the discrete indices Bxhxw for DI.
-        Returns:
-            The output tensor in Bx3xHxW, range [-1..1].
-        """
-        return self._dec_model(input_latent)
-    @torch.no_grad()
-    def encode(self, input_tensor: torch.Tensor) -> tuple[torch.Tensor]:
-        """Encodes an image into a latent embedding or code.
-        Args:
-            input_tensor: The input tensor Bx3xHxW layout, range [-1..1].
-        Returns:
-            For continuous image (CI) tokenizer, the tuple contains:
-                - The latent embedding, Bx16x(h)x(w), where the compression
-                rate is (H/h x W/w), and channel dimension of 16.
-            For discrete image (DI) tokenizer, the tuple contains:
-                - The indices, Bx(h)x(w), from a codebook of size 64K, which
-                corresponds to FSQ levels of (8,8,8,5,5,5).
-               - The discrete code, Bx6x(h)x(w), where the compression rate is
-                again (H/h x W/w), and channel dimension of 6.
-        """
-        output_latent = self._enc_model(input_tensor)
-        if isinstance(output_latent, torch.Tensor):
-            return output_latent
-        return output_latent[:-1]
-    @torch.no_grad()
-    def forward(self, image: np.ndarray) -> np.ndarray:
-        """Reconstructs an image using a pre-trained tokenizer.
-        Args:
-            image: The input image BxHxWxC layout, range [0..255].
-        Returns:
-            The reconstructed image in range [0..255], layout BxHxWxC.
-        """
-        padded_input_image, crop_region = pad_image_batch(image)
-        input_tensor = numpy2tensor(padded_input_image, dtype=self._dtype, device=self._device)
-        output_tensor = self.autoencode(input_tensor)
-        padded_output_image = tensor2numpy(output_tensor)
-        return unpad_image_batch(padded_output_image, crop_region)

cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/utils.py DELETED Viewed

@@ -1,402 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Utility functions for the inference libraries."""
-import os
-from glob import glob
-from typing import Any
-import mediapy as media
-import numpy as np
-import torch
-from cosmos_transfer1.auxiliary.tokenizer.networks import TokenizerModels
-_DTYPE, _DEVICE = torch.bfloat16, "cuda"
-_UINT8_MAX_F = float(torch.iinfo(torch.uint8).max)
-_SPATIAL_ALIGN = 16
-_TEMPORAL_ALIGN = 8
-def load_model(
-    jit_filepath: str = None,
-    tokenizer_config: dict[str, Any] = None,
-    device: str = "cuda",
-) -> torch.nn.Module | torch.jit.ScriptModule:
-    """Loads a torch.nn.Module from a filepath.
-    Args:
-        jit_filepath: The filepath to the JIT-compiled model.
-        device: The device to load the model onto, default=cuda.
-    Returns:
-        The JIT compiled model loaded to device and on eval mode.
-    """
-    if tokenizer_config is None:
-        return load_jit_model(jit_filepath, device)
-    full_model, ckpts = _load_pytorch_model(jit_filepath, tokenizer_config, device)
-    full_model.load_state_dict(ckpts.state_dict(), strict=False)
-    return full_model.eval().to(device)
-def load_encoder_model(
-    jit_filepath: str = None,
-    tokenizer_config: dict[str, Any] = None,
-    device: str = "cuda",
-) -> torch.nn.Module | torch.jit.ScriptModule:
-    """Loads a torch.nn.Module from a filepath.
-    Args:
-        jit_filepath: The filepath to the JIT-compiled model.
-        device: The device to load the model onto, default=cuda.
-    Returns:
-        The JIT compiled model loaded to device and on eval mode.
-    """
-    if tokenizer_config is None:
-        return load_jit_model(jit_filepath, device)
-    full_model, ckpts = _load_pytorch_model(jit_filepath, tokenizer_config, device)
-    encoder_model = full_model.encoder_jit()
-    encoder_model.load_state_dict(ckpts.state_dict(), strict=False)
-    return encoder_model.eval().to(device)
-def load_decoder_model(
-    jit_filepath: str = None,
-    tokenizer_config: dict[str, Any] = None,
-    device: str = "cuda",
-) -> torch.nn.Module | torch.jit.ScriptModule:
-    """Loads a torch.nn.Module from a filepath.
-    Args:
-        jit_filepath: The filepath to the JIT-compiled model.
-        device: The device to load the model onto, default=cuda.
-    Returns:
-        The JIT compiled model loaded to device and on eval mode.
-    """
-    if tokenizer_config is None:
-        return load_jit_model(jit_filepath, device)
-    full_model, ckpts = _load_pytorch_model(jit_filepath, tokenizer_config, device)
-    decoder_model = full_model.decoder_jit()
-    decoder_model.load_state_dict(ckpts.state_dict(), strict=False)
-    return decoder_model.eval().to(device)
-def _load_pytorch_model(
-    jit_filepath: str = None, tokenizer_config: str = None, device: str = "cuda"
-) -> torch.nn.Module:
-    """Loads a torch.nn.Module from a filepath.
-    Args:
-        jit_filepath: The filepath to the JIT-compiled model.
-        device: The device to load the model onto, default=cuda.
-    Returns:
-        The JIT compiled model loaded to device and on eval mode.
-    """
-    tokenizer_name = tokenizer_config["name"]
-    model = TokenizerModels[tokenizer_name].value(**tokenizer_config)
-    ckpts = torch.jit.load(jit_filepath, map_location=device)
-    return model, ckpts
-def load_jit_model(jit_filepath: str = None, device: str = "cuda") -> torch.jit.ScriptModule:
-    """Loads a torch.jit.ScriptModule from a filepath.
-    Args:
-        jit_filepath: The filepath to the JIT-compiled model.
-        device: The device to load the model onto, default=cuda.
-    Returns:
-        The JIT compiled model loaded to device and on eval mode.
-    """
-    model = torch.jit.load(jit_filepath, map_location=device)
-    return model.eval().to(device)
-def save_jit_model(
-    model: torch.jit.ScriptModule | torch.jit.RecursiveScriptModule = None,
-    jit_filepath: str = None,
-) -> None:
-    """Saves a torch.jit.ScriptModule or torch.jit.RecursiveScriptModule to file.
-    Args:
-        model: JIT compiled model loaded onto `config.checkpoint.jit.device`.
-        jit_filepath: The filepath to the JIT-compiled model.
-    """
-    torch.jit.save(model, jit_filepath)
-def get_filepaths(input_pattern) -> list[str]:
-    """Returns a list of filepaths from a pattern."""
-    filepaths = sorted(glob(str(input_pattern)))
-    return list(set(filepaths))
-def get_output_filepath(filepath: str, output_dir: str = None) -> str:
-    """Returns the output filepath for the given input filepath."""
-    output_dir = output_dir or f"{os.path.dirname(filepath)}/reconstructions"
-    output_filepath = f"{output_dir}/{os.path.basename(filepath)}"
-    os.makedirs(output_dir, exist_ok=True)
-    return output_filepath
-def read_image(filepath: str) -> np.ndarray:
-    """Reads an image from a filepath.
-    Args:
-        filepath: The filepath to the image.
-    Returns:
-        The image as a numpy array, layout HxWxC, range [0..255], uint8 dtype.
-    """
-    image = media.read_image(filepath)
-    # convert the grey scale image to RGB
-    # since our tokenizers always assume 3-channel RGB image
-    if image.ndim == 2:
-        image = np.stack([image] * 3, axis=-1)
-    # convert RGBA to RGB
-    if image.shape[-1] == 4:
-        image = image[..., :3]
-    return image
-def read_video(filepath: str) -> np.ndarray:
-    """Reads a video from a filepath.
-    Args:
-        filepath: The filepath to the video.
-    Returns:
-        The video as a numpy array, layout TxHxWxC, range [0..255], uint8 dtype.
-    """
-    video = media.read_video(filepath)
-    # convert the grey scale frame to RGB
-    # since our tokenizers always assume 3-channel video
-    if video.ndim == 3:
-        video = np.stack([video] * 3, axis=-1)
-    # convert RGBA to RGB
-    if video.shape[-1] == 4:
-        video = video[..., :3]
-    return video
-def resize_image(image: np.ndarray, short_size: int = None) -> np.ndarray:
-    """Resizes an image to have the short side of `short_size`.
-    Args:
-        image: The image to resize, layout HxWxC, of any range.
-        short_size: The size of the short side.
-    Returns:
-        The resized image.
-    """
-    if short_size is None:
-        return image
-    height, width = image.shape[-3:-1]
-    if height <= width:
-        height_new, width_new = short_size, int(width * short_size / height + 0.5)
-        width_new = width_new if width_new % 2 == 0 else width_new + 1
-    else:
-        height_new, width_new = (
-            int(height * short_size / width + 0.5),
-            short_size,
-        )
-        height_new = height_new if height_new % 2 == 0 else height_new + 1
-    return media.resize_image(image, shape=(height_new, width_new))
-def resize_video(video: np.ndarray, short_size: int = None) -> np.ndarray:
-    """Resizes a video to have the short side of `short_size`.
-    Args:
-        video: The video to resize, layout TxHxWxC, of any range.
-        short_size: The size of the short side.
-    Returns:
-        The resized video.
-    """
-    if short_size is None:
-        return video
-    height, width = video.shape[-3:-1]
-    if height <= width:
-        height_new, width_new = short_size, int(width * short_size / height + 0.5)
-        width_new = width_new if width_new % 2 == 0 else width_new + 1
-    else:
-        height_new, width_new = (
-            int(height * short_size / width + 0.5),
-            short_size,
-        )
-        height_new = height_new if height_new % 2 == 0 else height_new + 1
-    return media.resize_video(video, shape=(height_new, width_new))
-def write_image(filepath: str, image: np.ndarray):
-    """Writes an image to a filepath."""
-    return media.write_image(filepath, image)
-def write_video(filepath: str, video: np.ndarray, fps: int = 24) -> None:
-    """Writes a video to a filepath."""
-    return media.write_video(filepath, video, fps=fps)
-def numpy2tensor(
-    input_image: np.ndarray,
-    dtype: torch.dtype = _DTYPE,
-    device: str = _DEVICE,
-    range_min: int = -1,
-) -> torch.Tensor:
-    """Converts image(dtype=np.uint8) to `dtype` in range [0..255].
-    Args:
-        input_image: A batch of images in range [0..255], BxHxWx3 layout.
-    Returns:
-        A torch.Tensor of layout Bx3xHxW in range [-1..1], dtype.
-    """
-    ndim = input_image.ndim
-    indices = list(range(1, ndim))[-1:] + list(range(1, ndim))[:-1]
-    image = input_image.transpose((0,) + tuple(indices)) / _UINT8_MAX_F
-    if range_min == -1:
-        image = 2.0 * image - 1.0
-    return torch.from_numpy(image).to(dtype).to(device)
-def tensor2numpy(input_tensor: torch.Tensor, range_min: int = -1) -> np.ndarray:
-    """Converts tensor in [-1,1] to image(dtype=np.uint8) in range [0..255].
-    Args:
-        input_tensor: Input image tensor of Bx3xHxW layout, range [-1..1].
-    Returns:
-        A numpy image of layout BxHxWx3, range [0..255], uint8 dtype.
-    """
-    if range_min == -1:
-        input_tensor = (input_tensor.float() + 1.0) / 2.0
-    ndim = input_tensor.ndim
-    output_image = input_tensor.clamp(0, 1).cpu().numpy()
-    output_image = output_image.transpose((0,) + tuple(range(2, ndim)) + (1,))
-    return (output_image * _UINT8_MAX_F + 0.5).astype(np.uint8)
-def pad_image_batch(batch: np.ndarray, spatial_align: int = _SPATIAL_ALIGN) -> tuple[np.ndarray, list[int]]:
-    """Pads a batch of images to be divisible by `spatial_align`.
-    Args:
-        batch: The batch of images to pad, layout BxHxWx3, in any range.
-        align: The alignment to pad to.
-    Returns:
-        The padded batch and the crop region.
-    """
-    height, width = batch.shape[1:3]
-    align = spatial_align
-    height_to_pad = (align - height % align) if height % align != 0 else 0
-    width_to_pad = (align - width % align) if width % align != 0 else 0
-    crop_region = [
-        height_to_pad >> 1,
-        width_to_pad >> 1,
-        height + (height_to_pad >> 1),
-        width + (width_to_pad >> 1),
-    ]
-    batch = np.pad(
-        batch,
-        (
-            (0, 0),
-            (height_to_pad >> 1, height_to_pad - (height_to_pad >> 1)),
-            (width_to_pad >> 1, width_to_pad - (width_to_pad >> 1)),
-            (0, 0),
-        ),
-        mode="constant",
-    )
-    return batch, crop_region
-def pad_video_batch(
-    batch: np.ndarray,
-    temporal_align: int = _TEMPORAL_ALIGN,
-    spatial_align: int = _SPATIAL_ALIGN,
-) -> tuple[np.ndarray, list[int]]:
-    """Pads a batch of videos to be divisible by `temporal_align` or `spatial_align`.
-    Zero pad spatially. Reflection pad temporally to handle causality better.
-    Args:
-        batch: The batch of videos to pad., layout BxFxHxWx3, in any range.
-        align: The alignment to pad to.
-    Returns:
-        The padded batch and the crop region.
-    """
-    num_frames, height, width = batch.shape[-4:-1]
-    align = spatial_align
-    height_to_pad = (align - height % align) if height % align != 0 else 0
-    width_to_pad = (align - width % align) if width % align != 0 else 0
-    align = temporal_align
-    frames_to_pad = (align - (num_frames - 1) % align) if (num_frames - 1) % align != 0 else 0
-    crop_region = [
-        frames_to_pad >> 1,
-        height_to_pad >> 1,
-        width_to_pad >> 1,
-        num_frames + (frames_to_pad >> 1),
-        height + (height_to_pad >> 1),
-        width + (width_to_pad >> 1),
-    ]
-    batch = np.pad(
-        batch,
-        (
-            (0, 0),
-            (0, 0),
-            (height_to_pad >> 1, height_to_pad - (height_to_pad >> 1)),
-            (width_to_pad >> 1, width_to_pad - (width_to_pad >> 1)),
-            (0, 0),
-        ),
-        mode="constant",
-    )
-    batch = np.pad(
-        batch,
-        (
-            (0, 0),
-            (frames_to_pad >> 1, frames_to_pad - (frames_to_pad >> 1)),
-            (0, 0),
-            (0, 0),
-            (0, 0),
-        ),
-        mode="edge",
-    )
-    return batch, crop_region
-def unpad_video_batch(batch: np.ndarray, crop_region: list[int]) -> np.ndarray:
-    """Unpads video with `crop_region`.
-    Args:
-        batch: A batch of numpy videos, layout BxFxHxWxC.
-        crop_region: [f1,y1,x1,f2,y2,x2] first, top, left, last, bot, right crop indices.
-    Returns:
-        np.ndarray: Cropped numpy video, layout BxFxHxWxC.
-    """
-    assert len(crop_region) == 6, "crop_region should be len of 6."
-    f1, y1, x1, f2, y2, x2 = crop_region
-    return batch[..., f1:f2, y1:y2, x1:x2, :]
-def unpad_image_batch(batch: np.ndarray, crop_region: list[int]) -> np.ndarray:
-    """Unpads image with `crop_region`.
-    Args:
-        batch: A batch of numpy images, layout BxHxWxC.
-        crop_region: [y1,x1,y2,x2] top, left, bot, right crop indices.
-    Returns:
-        np.ndarray: Cropped numpy image, layout BxHxWxC.
-    """
-    assert len(crop_region) == 4, "crop_region should be len of 4."
-    y1, x1, y2, x2 = crop_region
-    return batch[..., y1:y2, x1:x2, :]

cosmos-transfer1/cosmos_transfer1/auxiliary/tokenizer/inference/video_cli.py DELETED Viewed

@@ -1,210 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""A CLI to run CausalVideoTokenizer on plain videos based on torch.jit.
-Usage:
-    python3 -m cosmos_transfer1.auxiliary.tokenizer.inference.video_cli \
-        --video_pattern 'path/to/video/samples/*.mp4' \
-        --output_dir ./reconstructions \
-        --checkpoint_enc ./checkpoints/<model-name>/encoder.jit \
-        --checkpoint_dec ./checkpoints/<model-name>/decoder.jit
-    Optionally, you can run the model in pure PyTorch mode:
-    python3 -m cosmos_transfer1.auxiliary.tokenizer.inference.video_cli \
-        --video_pattern 'path/to/video/samples/*.mp4' \
-        --mode=torch \
-        --tokenizer_type=CV \
-        --temporal_compression=4 \
-        --spatial_compression=8 \
-        --checkpoint_enc ./checkpoints/<model-name>/encoder.jit \
-        --checkpoint_dec ./checkpoints/<model-name>/decoder.jit
-"""
-import os
-import sys
-from argparse import ArgumentParser, Namespace
-from typing import Any
-import numpy as np
-from loguru import logger as logging
-from cosmos_transfer1.auxiliary.tokenizer.inference.utils import (
-    get_filepaths,
-    get_output_filepath,
-    read_video,
-    resize_video,
-    write_video,
-)
-from cosmos_transfer1.auxiliary.tokenizer.inference.video_lib import CausalVideoTokenizer
-from cosmos_transfer1.auxiliary.tokenizer.networks import TokenizerConfigs
-def _parse_args() -> tuple[Namespace, dict[str, Any]]:
-    parser = ArgumentParser(description="A CLI for CausalVideoTokenizer.")
-    parser.add_argument(
-        "--video_pattern",
-        type=str,
-        default="path/to/videos/*.mp4",
-        help="Glob pattern.",
-    )
-    parser.add_argument(
-        "--checkpoint",
-        type=str,
-        default=None,
-        help="JIT full Autoencoder model filepath.",
-    )
-    parser.add_argument(
-        "--checkpoint_enc",
-        type=str,
-        default=None,
-        help="JIT Encoder model filepath.",
-    )
-    parser.add_argument(
-        "--checkpoint_dec",
-        type=str,
-        default=None,
-        help="JIT Decoder model filepath.",
-    )
-    parser.add_argument(
-        "--tokenizer_type",
-        type=str,
-        choices=["CV", "DV"],
-        help="Specifies the tokenizer type.",
-    )
-    parser.add_argument(
-        "--spatial_compression",
-        type=int,
-        choices=[8, 16],
-        default=8,
-        help="The spatial compression factor.",
-    )
-    parser.add_argument(
-        "--temporal_compression",
-        type=int,
-        choices=[4, 8],
-        default=4,
-        help="The temporal compression factor.",
-    )
-    parser.add_argument(
-        "--mode",
-        type=str,
-        choices=["torch", "jit"],
-        default="jit",
-        help="Specify the backend: native 'torch' or 'jit' (default: 'jit')",
-    )
-    parser.add_argument(
-        "--short_size",
-        type=int,
-        default=None,
-        help="The size to resample inputs. None, by default.",
-    )
-    parser.add_argument(
-        "--temporal_window",
-        type=int,
-        default=17,
-        help="The temporal window to operate at a time.",
-    )
-    parser.add_argument(
-        "--dtype",
-        type=str,
-        default="bfloat16",
-        help="Sets the precision, default bfloat16.",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cuda",
-        help="Device for invoking the model.",
-    )
-    parser.add_argument("--output_dir", type=str, default=None, help="Output directory.")
-    parser.add_argument(
-        "--output_fps",
-        type=float,
-        default=24.0,
-        help="Output frames-per-second (FPS).",
-    )
-    parser.add_argument(
-        "--save_input",
-        action="store_true",
-        help="If on, the input video will be be outputted too.",
-    )
-    args = parser.parse_args()
-    return args
-logging.info("Initializes args ...")
-args = _parse_args()
-if args.mode == "torch" and args.tokenizer_type not in ["CV", "DV"]:
-    logging.error("'torch' backend requires the tokenizer_type of 'CV' or 'DV'.")
-    sys.exit(1)
-def _run_eval() -> None:
-    """Invokes JIT-compiled CausalVideoTokenizer on an input video."""
-    if args.checkpoint_enc is None and args.checkpoint_dec is None and args.checkpoint is None:
-        logging.warning("Aborting. Both encoder or decoder JIT required. Or provide the full autoencoder JIT model.")
-        return
-    if args.mode == "torch":
-        tokenizer_config = TokenizerConfigs[args.tokenizer_type].value
-        tokenizer_config.update(dict(spatial_compression=args.spatial_compression))
-        tokenizer_config.update(dict(temporal_compression=args.temporal_compression))
-    else:
-        tokenizer_config = None
-    logging.info(
-        f"Loading a torch.jit model `{os.path.dirname(args.checkpoint or args.checkpoint_enc or args.checkpoint_dec)}` ..."
-    )
-    autoencoder = CausalVideoTokenizer(
-        checkpoint=args.checkpoint,
-        checkpoint_enc=args.checkpoint_enc,
-        checkpoint_dec=args.checkpoint_dec,
-        tokenizer_config=tokenizer_config,
-        device=args.device,
-        dtype=args.dtype,
-    )
-    logging.info(f"Looking for files matching video_pattern={args.video_pattern} ...")
-    filepaths = get_filepaths(args.video_pattern)
-    logging.info(f"Found {len(filepaths)} videos from {args.video_pattern}.")
-    for filepath in filepaths:
-        logging.info(f"Reading video {filepath} ...")
-        video = read_video(filepath)
-        video = resize_video(video, short_size=args.short_size)
-        logging.info("Invoking the autoencoder model in ... ")
-        batch_video = video[np.newaxis, ...]
-        output_video = autoencoder(batch_video, temporal_window=args.temporal_window)[0]
-        logging.info("Constructing output filepath ...")
-        output_filepath = get_output_filepath(filepath, output_dir=args.output_dir)
-        logging.info(f"Outputing {output_filepath} ...")
-        write_video(output_filepath, output_video, fps=args.output_fps)
-        if args.save_input:
-            ext = os.path.splitext(output_filepath)[-1]
-            input_filepath = output_filepath.replace(ext, "_input" + ext)
-            write_video(input_filepath, video, fps=args.output_fps)
-@logging.catch(reraise=True)
-def main() -> None:
-    _run_eval()
-if __name__ == "__main__":
-    main()