zonic-generator

Sleeping

App Files Files Community

unpairedelectron07 commited on Jan 21, 2024

Commit

70c420e

verified ·

1 Parent(s): eb6b37d

Upload 3 files

Browse files

Files changed (3) hide show

audiocraft/environment.py +176 -0
audiocraft/py.typed +0 -0
audiocraft/train.py +163 -0

audiocraft/environment.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Provides cluster and tools configuration across clusters (slurm, dora, utilities).
+"""
+import logging
+import os
+from pathlib import Path
+import re
+import typing as tp
+import omegaconf
+from .utils.cluster import _guess_cluster_type
+logger = logging.getLogger(__name__)
+class AudioCraftEnvironment:
+    """Environment configuration for teams and clusters.
+    AudioCraftEnvironment picks compute cluster settings (slurm, dora) from the current running environment
+    or declared variable and the loaded team configuration. Additionally, the AudioCraftEnvironment
+    provides pointers to a reference folder resolved automatically across clusters that is shared across team members,
+    allowing to share sigs or other files to run jobs. Finally, it provides dataset mappers to automatically
+    map dataset file paths to new locations across clusters, allowing to use the same manifest of files across cluters.
+    The cluster type is identified automatically and base configuration file is read from config/teams.yaml.
+    Use the following environment variables to specify the cluster, team or configuration:
+        AUDIOCRAFT_CLUSTER (optional): Cluster type to enforce. Useful if the cluster type
+            cannot be inferred automatically.
+        AUDIOCRAFT_CONFIG (optional): Path to yaml config holding the teams configuration.
+            If not set, configuration is read from config/teams.yaml.
+        AUDIOCRAFT_TEAM (optional): Name of the team. Recommended to set to your own team.
+            Cluster configuration are shared across teams to match compute allocation,
+            specify your cluster configuration in the configuration file under a key mapping
+            your team name.
+    """
+    _instance = None
+    DEFAULT_TEAM = "default"
+    def __init__(self) -> None:
+        """Loads configuration."""
+        self.team: str = os.getenv("AUDIOCRAFT_TEAM", self.DEFAULT_TEAM)
+        cluster_type = _guess_cluster_type()
+        cluster = os.getenv(
+            "AUDIOCRAFT_CLUSTER", cluster_type.value
+        )
+        logger.info("Detecting cluster type %s", cluster_type)
+        self.cluster: str = cluster
+        config_path = os.getenv(
+            "AUDIOCRAFT_CONFIG",
+            Path(__file__)
+            .parent.parent.joinpath("config/teams", self.team)
+            .with_suffix(".yaml"),
+        )
+        self.config = omegaconf.OmegaConf.load(config_path)
+        self._dataset_mappers = []
+        cluster_config = self._get_cluster_config()
+        if "dataset_mappers" in cluster_config:
+            for pattern, repl in cluster_config["dataset_mappers"].items():
+                regex = re.compile(pattern)
+                self._dataset_mappers.append((regex, repl))
+    def _get_cluster_config(self) -> omegaconf.DictConfig:
+        assert isinstance(self.config, omegaconf.DictConfig)
+        return self.config[self.cluster]
+    @classmethod
+    def instance(cls):
+        if cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
+    @classmethod
+    def reset(cls):
+        """Clears the environment and forces a reload on next invocation."""
+        cls._instance = None
+    @classmethod
+    def get_team(cls) -> str:
+        """Gets the selected team as dictated by the AUDIOCRAFT_TEAM env var.
+        If not defined, defaults to "labs".
+        """
+        return cls.instance().team
+    @classmethod
+    def get_cluster(cls) -> str:
+        """Gets the detected cluster.
+        This value can be overridden by the AUDIOCRAFT_CLUSTER env var.
+        """
+        return cls.instance().cluster
+    @classmethod
+    def get_dora_dir(cls) -> Path:
+        """Gets the path to the dora directory for the current team and cluster.
+        Value is overridden by the AUDIOCRAFT_DORA_DIR env var.
+        """
+        cluster_config = cls.instance()._get_cluster_config()
+        dora_dir = os.getenv("AUDIOCRAFT_DORA_DIR", cluster_config["dora_dir"])
+        logger.warning(f"Dora directory: {dora_dir}")
+        return Path(dora_dir)
+    @classmethod
+    def get_reference_dir(cls) -> Path:
+        """Gets the path to the reference directory for the current team and cluster.
+        Value is overridden by the AUDIOCRAFT_REFERENCE_DIR env var.
+        """
+        cluster_config = cls.instance()._get_cluster_config()
+        return Path(os.getenv("AUDIOCRAFT_REFERENCE_DIR", cluster_config["reference_dir"]))
+    @classmethod
+    def get_slurm_exclude(cls) -> tp.Optional[str]:
+        """Get the list of nodes to exclude for that cluster."""
+        cluster_config = cls.instance()._get_cluster_config()
+        return cluster_config.get("slurm_exclude")
+    @classmethod
+    def get_slurm_partitions(cls, partition_types: tp.Optional[tp.List[str]] = None) -> str:
+        """Gets the requested partitions for the current team and cluster as a comma-separated string.
+        Args:
+            partition_types (list[str], optional): partition types to retrieve. Values must be
+                from ['global', 'team']. If not provided, the global partition is returned.
+        """
+        if not partition_types:
+            partition_types = ["global"]
+        cluster_config = cls.instance()._get_cluster_config()
+        partitions = [
+            cluster_config["partitions"][partition_type]
+            for partition_type in partition_types
+        ]
+        return ",".join(partitions)
+    @classmethod
+    def resolve_reference_path(cls, path: tp.Union[str, Path]) -> Path:
+        """Converts reference placeholder in path with configured reference dir to resolve paths.
+        Args:
+            path (str or Path): Path to resolve.
+        Returns:
+            Path: Resolved path.
+        """
+        path = str(path)
+        if path.startswith("//reference"):
+            reference_dir = cls.get_reference_dir()
+            logger.warn(f"Reference directory: {reference_dir}")
+            assert (
+                reference_dir.exists() and reference_dir.is_dir()
+            ), f"Reference directory does not exist: {reference_dir}."
+            path = re.sub("^//reference", str(reference_dir), path)
+        return Path(path)
+    @classmethod
+    def apply_dataset_mappers(cls, path: str) -> str:
+        """Applies dataset mapping regex rules as defined in the configuration.
+        If no rules are defined, the path is returned as-is.
+        """
+        instance = cls.instance()
+        for pattern, repl in instance._dataset_mappers:
+            path = pattern.sub(repl, path)
+        return path

audiocraft/py.typed ADDED Viewed

File without changes

audiocraft/train.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Entry point for dora to launch solvers for running training loops.
+See more info on how to use dora: https://github.com/facebookresearch/dora
+"""
+import logging
+import multiprocessing
+import os
+from pathlib import Path
+import sys
+import typing as tp
+from dora import git_save, hydra_main, XP
+import flashy
+import hydra
+import omegaconf
+from .environment import AudioCraftEnvironment
+from .utils.cluster import get_slurm_parameters
+logger = logging.getLogger(__name__)
+def resolve_config_dset_paths(cfg):
+    """Enable Dora to load manifest from git clone repository."""
+    # manifest files for the different splits
+    for key, value in cfg.datasource.items():
+        if isinstance(value, str):
+            cfg.datasource[key] = git_save.to_absolute_path(value)
+def get_solver(cfg):
+    from . import solvers
+    # Convert batch size to batch size for each GPU
+    assert cfg.dataset.batch_size % flashy.distrib.world_size() == 0
+    cfg.dataset.batch_size //= flashy.distrib.world_size()
+    for split in ['train', 'valid', 'evaluate', 'generate']:
+        if hasattr(cfg.dataset, split) and hasattr(cfg.dataset[split], 'batch_size'):
+            assert cfg.dataset[split].batch_size % flashy.distrib.world_size() == 0
+            cfg.dataset[split].batch_size //= flashy.distrib.world_size()
+    resolve_config_dset_paths(cfg)
+    solver = solvers.get_solver(cfg)
+    return solver
+def get_solver_from_xp(xp: XP, override_cfg: tp.Optional[tp.Union[dict, omegaconf.DictConfig]] = None,
+                       restore: bool = True, load_best: bool = True,
+                       ignore_state_keys: tp.List[str] = [], disable_fsdp: bool = True):
+    """Given a XP, return the Solver object.
+    Args:
+        xp (XP): Dora experiment for which to retrieve the solver.
+        override_cfg (dict or None): If not None, should be a dict used to
+            override some values in the config of `xp`. This will not impact
+            the XP signature or folder. The format is different
+            than the one used in Dora grids, nested keys should actually be nested dicts,
+            not flattened, e.g. `{'optim': {'batch_size': 32}}`.
+        restore (bool): If `True` (the default), restore state from the last checkpoint.
+        load_best (bool): If `True` (the default), load the best state from the checkpoint.
+        ignore_state_keys (list[str]): List of sources to ignore when loading the state, e.g. `optimizer`.
+        disable_fsdp (bool): if True, disables FSDP entirely. This will
+            also automatically skip loading the EMA. For solver specific
+            state sources, like the optimizer, you might want to
+            use along `ignore_state_keys=['optimizer']`. Must be used with `load_best=True`.
+    """
+    logger.info(f"Loading solver from XP {xp.sig}. "
+                f"Overrides used: {xp.argv}")
+    cfg = xp.cfg
+    if override_cfg is not None:
+        cfg = omegaconf.OmegaConf.merge(cfg, omegaconf.DictConfig(override_cfg))
+    if disable_fsdp and cfg.fsdp.use:
+        cfg.fsdp.use = False
+        assert load_best is True
+        # ignoring some keys that were FSDP sharded like model, ema, and best_state.
+        # fsdp_best_state will be used in that case. When using a specific solver,
+        # one is responsible for adding the relevant keys, e.g. 'optimizer'.
+        # We could make something to automatically register those inside the solver, but that
+        # seem overkill at this point.
+        ignore_state_keys = ignore_state_keys + ['model', 'ema', 'best_state']
+    try:
+        with xp.enter():
+            solver = get_solver(cfg)
+            if restore:
+                solver.restore(load_best=load_best, ignore_state_keys=ignore_state_keys)
+        return solver
+    finally:
+        hydra.core.global_hydra.GlobalHydra.instance().clear()
+def get_solver_from_sig(sig: str, *args, **kwargs):
+    """Return Solver object from Dora signature, i.e. to play with it from a notebook.
+    See `get_solver_from_xp` for more information.
+    """
+    xp = main.get_xp_from_sig(sig)
+    return get_solver_from_xp(xp, *args, **kwargs)
+def init_seed_and_system(cfg):
+    import numpy as np
+    import torch
+    import random
+    from audiocraft.modules.transformer import set_efficient_attention_backend
+    multiprocessing.set_start_method(cfg.mp_start_method)
+    logger.debug('Setting mp start method to %s', cfg.mp_start_method)
+    random.seed(cfg.seed)
+    np.random.seed(cfg.seed)
+    # torch also initialize cuda seed if available
+    torch.manual_seed(cfg.seed)
+    torch.set_num_threads(cfg.num_threads)
+    os.environ['MKL_NUM_THREADS'] = str(cfg.num_threads)
+    os.environ['OMP_NUM_THREADS'] = str(cfg.num_threads)
+    logger.debug('Setting num threads to %d', cfg.num_threads)
+    set_efficient_attention_backend(cfg.efficient_attention_backend)
+    logger.debug('Setting efficient attention backend to %s', cfg.efficient_attention_backend)
+    if 'SLURM_JOB_ID' in os.environ:
+        tmpdir = Path('/scratch/slurm_tmpdir/' + os.environ['SLURM_JOB_ID'])
+        if tmpdir.exists():
+            logger.info("Changing tmpdir to %s", tmpdir)
+            os.environ['TMPDIR'] = str(tmpdir)
+@hydra_main(config_path='../config', config_name='config', version_base='1.1')
+def main(cfg):
+    init_seed_and_system(cfg)
+    # Setup logging both to XP specific folder, and to stderr.
+    log_name = '%s.log.{rank}' % cfg.execute_only if cfg.execute_only else 'solver.log.{rank}'
+    flashy.setup_logging(level=str(cfg.logging.level).upper(), log_name=log_name)
+    # Initialize distributed training, no need to specify anything when using Dora.
+    flashy.distrib.init()
+    solver = get_solver(cfg)
+    if cfg.show:
+        solver.show()
+        return
+    if cfg.execute_only:
+        assert cfg.execute_inplace or cfg.continue_from is not None, \
+            "Please explicitly specify the checkpoint to continue from with continue_from=<sig_or_path> " + \
+            "when running with execute_only or set execute_inplace to True."
+        solver.restore(replay_metrics=False)  # load checkpoint
+        solver.run_one_stage(cfg.execute_only)
+        return
+    return solver.run()
+main.dora.dir = AudioCraftEnvironment.get_dora_dir()
+main._base_cfg.slurm = get_slurm_parameters(main._base_cfg.slurm)
+if main.dora.shared is not None and not os.access(main.dora.shared, os.R_OK):
+    print("No read permission on dora.shared folder, ignoring it.", file=sys.stderr)
+    main.dora.shared = None
+if __name__ == '__main__':
+    main()