zonic-generator

Sleeping

App Files Files Community

unpairedelectron07 commited on Jan 21

Commit

797349c

•

1 Parent(s): 4eb202f

Upload 11 files

Browse files

Files changed (11) hide show

audiocraft/utils/autocast.py +40 -0
audiocraft/utils/best_state.py +81 -0
audiocraft/utils/cache.py +324 -0
audiocraft/utils/checkpoint.py +161 -0
audiocraft/utils/cluster.py +75 -0
audiocraft/utils/deadlock.py +58 -0
audiocraft/utils/export.py +79 -0
audiocraft/utils/export_legacy.py +70 -0
audiocraft/utils/notebook.py +32 -0
audiocraft/utils/profiler.py +38 -0
audiocraft/utils/utils.py +298 -0

audiocraft/utils/autocast.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+class TorchAutocast:
+    """TorchAutocast utility class.
+    Allows you to enable and disable autocast. This is specially useful
+    when dealing with different architectures and clusters with different
+    levels of support.
+    Args:
+        enabled (bool): Whether to enable torch.autocast or not.
+        args: Additional args for torch.autocast.
+        kwargs: Additional kwargs for torch.autocast
+    """
+    def __init__(self, enabled: bool, *args, **kwargs):
+        self.autocast = torch.autocast(*args, **kwargs) if enabled else None
+    def __enter__(self):
+        if self.autocast is None:
+            return
+        try:
+            self.autocast.__enter__()
+        except RuntimeError:
+            device = self.autocast.device
+            dtype = self.autocast.fast_dtype
+            raise RuntimeError(
+                f"There was an error autocasting with dtype={dtype} device={device}\n"
+                "If you are on the FAIR Cluster, you might need to use autocast_dtype=float16"
+            )
+    def __exit__(self, *args, **kwargs):
+        if self.autocast is None:
+            return
+        self.autocast.__exit__(*args, **kwargs)

audiocraft/utils/best_state.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from collections import defaultdict
+import logging
+import typing as tp
+import flashy
+import torch
+from ..optim import ModuleDictEMA
+from .utils import copy_state
+logger = logging.getLogger(__name__)
+class BestStateDictManager(flashy.state.StateDictSource):
+    """BestStateDictManager maintains a copy of best state_dict() for registered sources.
+    BestStateDictManager has two main attributes:
+        states (dict): State dict of the registered StateDictSource.
+        param_ids (dict): Dict of parameter ids for registered states from ModuleDictEMA and other sources.
+    When registering new sources, the BestStateDictManager will ensure two conflicting sources between
+    ModuleDictEMA and original modules are not both registered as it would otherwise create ambiguity about
+    what to consider for best state.
+    Args:
+        device (torch.device or str): Device on which we keep the copy.
+        dtype (torch.dtype): Data type for the state parameters.
+    """
+    def __init__(self, device: tp.Union[torch.device, str] = 'cpu',
+                 dtype: tp.Optional[torch.dtype] = None):
+        self.device = device
+        self.states: dict = {}
+        self.param_ids: dict = defaultdict(dict)
+        self.dtype = dtype
+    def _get_parameter_ids(self, state_dict):
+        return {id(p): name for name, p in state_dict.items() if isinstance(p, torch.Tensor)}
+    def _validate_no_parameter_ids_overlap(self, name: str, param_ids: dict):
+        for registered_name, registered_param_ids in self.param_ids.items():
+            if registered_name != name:
+                overlap = set.intersection(registered_param_ids.keys(), param_ids.keys())
+                assert len(overlap) == 0, f"Found {len(overlap)} / {len(param_ids.keys())} overlapping parameters"
+                f" in {name} and already registered {registered_name}: {' '.join(overlap)}"
+    def update(self, name: str, source: flashy.state.StateDictSource):
+        if name not in self.states:
+            raise ValueError(f"{name} missing from registered states.")
+        self.states[name] = copy_state(source.state_dict(), device=self.device, dtype=self.dtype)
+    def register(self, name: str, source: flashy.state.StateDictSource):
+        if name in self.states:
+            raise ValueError(f"{name} already present in states.")
+        # Registering parameter ids for EMA and non-EMA states allows us to check that
+        # there is no overlap that would create ambiguity about how to handle the best state
+        param_ids = self._get_parameter_ids(source.state_dict())
+        if isinstance(source, ModuleDictEMA):
+            logger.debug(f"Registering to best state: ModuleDictEMA '{name}' with {len(param_ids)} params")
+            self._validate_no_parameter_ids_overlap(name, param_ids)
+            self.param_ids[name] = param_ids
+        else:
+            logger.debug(f"Registering to best state: StateDictSource '{name}' with {len(param_ids)} params")
+            self._validate_no_parameter_ids_overlap('base', param_ids)
+            self.param_ids['base'].update(param_ids)
+        # Register state
+        self.states[name] = copy_state(source.state_dict(), device=self.device, dtype=self.dtype)
+    def state_dict(self) -> flashy.state.StateDict:
+        return self.states
+    def load_state_dict(self, state: flashy.state.StateDict):
+        for name, sub_state in state.items():
+            for k, v in sub_state.items():
+                self.states[name][k].copy_(v)

audiocraft/utils/cache.py ADDED Viewed

	@@ -0,0 +1,324 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from concurrent.futures import ThreadPoolExecutor
+from collections import deque
+from functools import partial
+from hashlib import sha1
+import logging
+from pathlib import Path
+import sys
+import typing as tp
+import zipfile
+import flashy
+import torch
+logger = logging.getLogger(__name__)
+def get_full_embed(full_embed: torch.Tensor, x: tp.Any, idx: int, device: tp.Union[str, torch.device]) -> torch.Tensor:
+    """Utility function for the EmbeddingCache, returning the full embedding without any chunking.
+    This method can be used in case there is no need in extracting a chunk of the full embedding
+    read from the cache.
+    Args:
+        full_embed (torch.Tensor): The full embedding.
+        x (any): Batch object from which the full embedding is derived.
+        idx (torch.Tensor): Index of object to consider in the batch object.
+    Returns:
+        full_embed (torch.Tensor): The full embedding
+    """
+    return full_embed.to(device)
+class EmbeddingCache:
+    """Cache around embeddings computation for faster execution.
+    The EmbeddingCache is storing pre-computed embeddings on disk and provides a simple API
+    to retrieve the pre-computed embeddings on full inputs and extract only a given chunk
+    using a user-provided function. When the cache is warm (all embeddings are pre-computed),
+    the EmbeddingCache allows for faster training as it removes the need of computing the embeddings.
+    Additionally, it provides in-memory cache around the loaded embeddings to limit IO footprint
+    and synchronization points in the forward calls.
+    Args:
+        cache_path (Path): Path to folder where all pre-computed embeddings are saved on disk.
+        device (str or torch.device): Device on which the embedding is returned.
+        compute_embed_fn (callable[[Path, any, int], torch.Tensor], optional): Function to compute
+            the embedding from a given object and path. This user provided function can compute the
+            embedding from the provided object or using the provided path as entry point. The last parameter
+            specify the index corresponding to the current embedding in the object that can represent batch metadata.
+        extract_embed_fn (callable[[torch.Tensor, any, int], torch.Tensor], optional): Function to extract
+            the desired embedding chunk from the full embedding loaded from the cache. The last parameter
+            specify the index corresponding to the current embedding in the object that can represent batch metadata.
+            If not specified, will return the full embedding unmodified.
+    """
+    def __init__(self, cache_path: tp.Union[str, Path], device: tp.Union[str, torch.device],
+                 compute_embed_fn: tp.Callable[[Path, tp.Any, int], torch.Tensor],
+                 extract_embed_fn: tp.Optional[tp.Callable[[torch.Tensor, tp.Any, int], torch.Tensor]] = None):
+        self.cache_path = Path(cache_path)
+        self.device = device
+        self._compute_embed_fn = compute_embed_fn
+        self._extract_embed_fn: tp.Callable[[torch.Tensor, tp.Any, int], torch.Tensor]
+        if extract_embed_fn is not None:
+            self._extract_embed_fn = extract_embed_fn
+        else:
+            self._extract_embed_fn = partial(get_full_embed, device=device)
+        if self.cache_path is not None:
+            self.cache_path.mkdir(exist_ok=True, parents=True)
+            logger.info(f"Cache instantiated at: {self.cache_path}")
+            self.pool = ThreadPoolExecutor(8)
+            self.pool.__enter__()
+        self._current_batch_cache: dict = {}
+        self._memory_cache: dict = {}
+    def _get_cache_path(self, path: tp.Union[Path, str]):
+        """Get cache path for the given file path."""
+        sig = sha1(str(path).encode()).hexdigest()
+        return self.cache_path / sig
+    @staticmethod
+    def _get_full_embed_from_cache(cache: Path):
+        """Loads full pre-computed embedding from the cache."""
+        try:
+            embed = torch.load(cache, 'cpu')
+        except Exception as exc:
+            logger.error("Error loading %s: %r", cache, exc)
+            embed = None
+        return embed
+    def get_embed_from_cache(self, paths: tp.List[Path], x: tp.Any) -> torch.Tensor:
+        """Get embedding from cache, computing and storing it to cache if not already cached.
+        The EmbeddingCache first tries to load the embedding from the in-memory cache
+        containing the pre-computed chunks populated through `populate_embed_cache`.
+        If not found, the full embedding is computed and stored on disk to be later accessed
+        to populate the in-memory cache, and the desired embedding chunk is extracted and returned.
+        Args:
+            paths (list[Path or str]): List of paths from where the embeddings can be loaded.
+            x (any): Object from which the embedding is extracted.
+        """
+        embeds = []
+        for idx, path in enumerate(paths):
+            cache = self._get_cache_path(path)
+            if cache in self._current_batch_cache:
+                embed = self._current_batch_cache[cache]
+            else:
+                full_embed = self._compute_embed_fn(path, x, idx)
+                try:
+                    with flashy.utils.write_and_rename(cache, pid=True) as f:
+                        torch.save(full_embed.cpu(), f)
+                except Exception as exc:
+                    logger.error('Error saving embed %s (%s): %r', cache, full_embed.shape, exc)
+                else:
+                    logger.info('New embed cache saved: %s (%s)', cache, full_embed.shape)
+                    embed = self._extract_embed_fn(full_embed, x, idx)
+            embeds.append(embed)
+        embed = torch.stack(embeds, dim=0)
+        return embed
+    def populate_embed_cache(self, paths: tp.List[Path], x: tp.Any) -> None:
+        """Populate in-memory caches for embeddings reading from the embeddings stored on disk.
+        The in-memory caches consist in a cache for the full embedding and another cache for the
+        final embedding chunk. Such caches are used to limit the IO access when computing the actual embeddings
+        and reduce the IO footprint and synchronization points during forward passes.
+        Args:
+            paths (list[Path]): List of paths from where the embeddings can be loaded.
+            x (any): Object from which the embedding is extracted.
+        """
+        self._current_batch_cache.clear()
+        if self.cache_path is not None:
+            futures: list = []
+            for path in paths:
+                assert path is not None, "Path is required for computation from cache"
+                cache = self._get_cache_path(path)
+                if cache in self._memory_cache or not cache.exists():
+                    futures.append(None)
+                else:
+                    futures.append(self.pool.submit(EmbeddingCache._get_full_embed_from_cache, cache))
+            for idx, (path, future) in enumerate(zip(paths, futures)):
+                assert path is not None
+                cache = self._get_cache_path(path)
+                full_embed = None
+                if future is None:
+                    if cache in self._memory_cache:
+                        full_embed = self._memory_cache[cache]
+                else:
+                    full_embed = future.result()
+                    if full_embed is not None:
+                        self._memory_cache[cache] = full_embed
+                        full_embed = full_embed.to(self.device)
+                if full_embed is not None:
+                    embed = self._extract_embed_fn(full_embed, x, idx)
+                    self._current_batch_cache[cache] = embed
+class CachedBatchWriter:
+    """Write pre computed caches for mini batches. This can
+    make loading a lot more efficient depending on your filesystem.
+    Args:
+        cache_folder (Path): folder in which the cached minibatches
+            will be stored.
+    Inside cache folder, the structure is the following:
+    `epoch_number / update_number.zip`
+    And the zip file contains one entry per batch item.
+    It is possible to use the cache with a batch size smaller than
+    created with but obviously not larger. Make sure to call the
+    `start_epoch(epoch)` method for indicating changes of epochs.
+    See the grid `audiocraft/grids/musicgen/musicgen_warmup_cache.py`
+    for an example of how to warmup the cache.
+    """
+    def __init__(self, cache_folder: Path):
+        self.cache_folder = cache_folder
+        self._current_epoch: tp.Optional[int] = None
+        self._current_index = 0
+    def start_epoch(self, epoch: int):
+        """Call at the beginning of each epoch.
+        """
+        self._current_epoch = epoch
+        self._current_index = 0
+        self._zip_path.parent.mkdir(exist_ok=True, parents=True)
+    @staticmethod
+    def _get_zip_path(cache_folder: Path, epoch: int, index: int):
+        return cache_folder / f"{epoch:05d}" / f"{index:06d}.zip"
+    @property
+    def _zip_path(self):
+        assert self._current_epoch is not None
+        return CachedBatchWriter._get_zip_path(self.cache_folder, self._current_epoch, self._current_index)
+    def save(self, *content):
+        """Save one mini batch. This function is distributed-aware
+        and will automatically merge all the items from the different
+        workers.
+        """
+        all_contents = []
+        for rank in range(flashy.distrib.world_size()):
+            their_content = flashy.distrib.broadcast_object(content, src=rank)
+            all_contents.append(their_content)
+        if flashy.distrib.is_rank_zero():
+            idx = 0
+            with flashy.utils.write_and_rename(self._zip_path) as tmp:
+                with zipfile.ZipFile(tmp, 'w') as zf:
+                    for content in all_contents:
+                        for vals in zip(*content):
+                            with zf.open(f'{idx}', 'w') as f:  # type: ignore
+                                torch.save(vals, f)
+                            idx += 1
+        flashy.distrib.barrier()
+        self._current_index += 1
+class CachedBatchLoader:
+    """Loader for cached mini-batches dumped with `CachedBatchWriter`.
+    Args:
+        cache_folder (Path): folder in which the cached minibatches are stored.
+        batch_size (int): batch size (per GPU) expected.
+        num_workers (int): number of workers to use for loading.
+        min_length (int): minimum expected length for each epoch. If some
+            mini-batches are missing, and error is raised.
+    This is iterable just like a regular DataLoader.
+    """
+    def __init__(self, cache_folder: Path, batch_size: int,
+                 num_workers: int = 10, min_length: int = 1):
+        self.cache_folder = cache_folder
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.min_length = min_length
+        self._current_epoch: tp.Optional[int] = None
+        self.sampler = None  # for compatibility with the regular DataLoader
+    def __len__(self):
+        path = CachedBatchWriter._get_zip_path(self.cache_folder, self._current_epoch or 0, 0).parent
+        return len([p for p in path.iterdir() if p.suffix == ".zip"])
+    def start_epoch(self, epoch: int):
+        """Call at the beginning of each epoch.
+        """
+        self._current_epoch = epoch
+    def _zip_path(self, index: int):
+        assert self._current_epoch is not None
+        return CachedBatchWriter._get_zip_path(self.cache_folder, self._current_epoch, index)
+    def _load_one(self, index: int):
+        zip_path = self._zip_path(index)
+        if not zip_path.exists():
+            if index < self.min_length:
+                raise RuntimeError(f"Cache should have at least {self.min_length} batches, but {index} doesn't exist")
+            return None
+        mode = "rb" if sys.version_info >= (3, 9) else "r"
+        try:
+            with zipfile.ZipFile(zip_path, 'r') as zf:
+                rank = flashy.distrib.rank()
+                world_size = flashy.distrib.world_size()
+                root = zipfile.Path(zf)
+                items = list(root.iterdir())
+                total_batch_size = self.batch_size * world_size
+                if len(items) < total_batch_size:
+                    raise RuntimeError(
+                        f"The cache can handle a max batch size of {len(items)}, "
+                        f"but {total_batch_size} is needed.")
+                start = rank * self.batch_size
+                items = items[start: start + self.batch_size]
+                assert len(items) == self.batch_size
+                entries = []
+                entries = [torch.load(item.open(mode), 'cpu') for item in items]  # type: ignore
+                transposed = zip(*entries)
+                out = []
+                for part in transposed:
+                    assert len(part) > 0
+                    if isinstance(part[0], torch.Tensor):
+                        out.append(torch.stack(part))
+                    else:
+                        assert isinstance(part, torch.Tensor)
+                        out.append(part)
+                return out
+        except Exception:
+            logger.error("Error when reading zip path %s", zip_path)
+            raise
+    def __iter__(self):
+        """This will yields tuples, exactly as provided to the
+        `CachedBatchWriter.save` method.
+        """
+        pool = ThreadPoolExecutor(self.num_workers)
+        next_index = 0
+        queue = deque()
+        def _get_next():
+            nonlocal next_index
+            r = queue.popleft().result()
+            if r is None:
+                return None
+            else:
+                queue.append(pool.submit(self._load_one, next_index))
+                next_index += 1
+            return r
+        with pool:
+            # fill the buffer of fetching jobs.
+            for _ in range(2 * self.num_workers):
+                queue.append(pool.submit(self._load_one, next_index))
+                next_index += 1
+            while True:
+                batch = _get_next()
+                if batch is None:
+                    return
+                yield batch

audiocraft/utils/checkpoint.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from enum import Enum
+import logging
+from pathlib import Path
+import re
+import typing as tp
+import flashy
+import torch
+from ..environment import AudioCraftEnvironment
+logger = logging.getLogger(__name__)
+class CheckpointSource(Enum):
+    CURRENT_XP = "current_xp"
+    PRETRAINED = "pretrained"
+    OTHER = "other"
+def checkpoint_name(name: tp.Optional[str] = None, rank: tp.Optional[int] = None, use_fsdp: bool = False) -> str:
+    """Checkpoint name formatted for all use in AudioCraft codebase and has the following format:
+    `checkpoint_<name>.th(.<rank>)`. By convention, name is expected to be empty for last checkpoint,
+    'best' for the best checkpoint or the epoch number.
+    Args:
+        name (str, optional): Name suffix for the checkpoint file stem.
+        rank (optional, int): Rank for distributed processing, retrieved with flashy if not provided.
+        use_fsdp (bool): Whether the calling solver relies on FSDP.
+    Returns:
+        str: The checkpoint name.
+    """
+    suffix = ''
+    if rank is None:
+        rank = flashy.distrib.rank()
+    if rank > 0 and use_fsdp:
+        suffix = '.' + str(rank)
+    name_part = ''
+    if name is not None:
+        name_part = f'_{name}'
+    return f'checkpoint{name_part}.th{suffix}'
+def is_sharded_checkpoint(path: Path) -> bool:
+    """Whether the checkpoint at the given path corresponds to a sharded checkpoint across rank."""
+    return re.search(r'\.th\.\d+$', path.name) is not None
+def resolve_checkpoint_path(sig_or_path: tp.Union[Path, str], name: tp.Optional[str] = None,
+                            use_fsdp: bool = False) -> tp.Optional[Path]:
+    """Resolve a given checkpoint path for a provided dora sig or path.
+    Args:
+        sig_or_path (Path or str): Checkpoint path or dora signature.
+        name (str, optional): Name suffix for the checkpoint file stem.
+        rank (optional, int): Rank for distributed processing, retrieved with flashy if not provided.
+        use_fsdp (bool): Whether the calling solver relies on FSDP.
+    Returns:
+        Path, optional: Resolved checkpoint path, if it exists.
+    """
+    from audiocraft import train
+    xps_root = train.main.dora.dir / 'xps'
+    sig_or_path = str(sig_or_path)
+    if sig_or_path.startswith('//sig/'):
+        sig = sig_or_path[len('//sig/'):]
+        path = xps_root / sig
+    else:
+        path = Path(sig_or_path)
+        path = AudioCraftEnvironment.resolve_reference_path(path)
+    if path.is_dir():
+        path = path / checkpoint_name(name, use_fsdp=use_fsdp)
+    if path.exists():
+        return path
+    else:
+        return None
+def load_checkpoint(checkpoint_path: Path, is_sharded: bool = False) -> tp.Any:
+    """Load state from checkpoints at the specified checkpoint path."""
+    if is_sharded:
+        rank0_checkpoint_path = checkpoint_path.parent / checkpoint_name(use_fsdp=False)
+        if rank0_checkpoint_path.exists():
+            check_sharded_checkpoint(checkpoint_path, rank0_checkpoint_path)
+    state = torch.load(checkpoint_path, 'cpu')
+    logger.info("Checkpoint loaded from %s", checkpoint_path)
+    return state
+def save_checkpoint(state: tp.Any, checkpoint_path: Path, is_sharded: bool = False) -> None:
+    """Save state to disk to the specified checkpoint_path."""
+    _safe_save_checkpoint(state, checkpoint_path, is_sharded)
+    logger.info("Checkpoint saved to %s", checkpoint_path)
+def flush_stale_checkpoints(checkpoint_path: Path, keep_last: tp.Optional[int] = None) -> None:
+    """Flush checkpoints to only keep last N checkpoints."""
+    if keep_last is None or keep_last <= 0:
+        return
+    checkpoint_dir = checkpoint_path.parent
+    suffix = ''
+    if flashy.distrib.rank() > 0:
+        suffix = f'.{flashy.distrib.rank()}'
+    checkpoint_files_with_epoch = []
+    for path in Path(checkpoint_dir).glob(f'checkpoint_*.th{suffix}'):
+        epoch_part = path.name.split('.', 1)[0].split('_', 1)[1]
+        if epoch_part.isdigit():
+            checkpoint_files_with_epoch.append((path, int(epoch_part)))
+    checkpoint_files = [path for path, _ in list(sorted(checkpoint_files_with_epoch, key=lambda t: t[1]))]
+    total_to_flush = max(0, len(checkpoint_files) - keep_last)
+    files_to_flush = checkpoint_files[:total_to_flush]
+    for path in files_to_flush:
+        logger.debug("Removing checkpoint: %s", str(path))
+        path.unlink(missing_ok=True)
+def check_sharded_checkpoint(checkpoint_path: Path, rank0_checkpoint_path: Path) -> None:
+    """Check sharded checkpoint state, ensuring the checkpoints are not corrupted."""
+    # Finish the work of a previous run that got interrupted while dumping.
+    old_path = Path(str(checkpoint_path) + '.old')
+    if old_path.exists():
+        raise RuntimeError(
+            f"Old checkpoint {old_path} from previous version of this code exist, cannot safely proceed.")
+    token = Path(str(rank0_checkpoint_path) + '.tmp.done')
+    tmp_path = Path(str(checkpoint_path) + '.tmp')
+    if token.exists():
+        if tmp_path.exists():
+            tmp_path.rename(checkpoint_path)
+    flashy.distrib.barrier()
+    if flashy.distrib.is_rank_zero() and token.exists():
+        token.unlink()
+def _safe_save_checkpoint(state: tp.Any, checkpoint_path: Path, is_sharded: bool = False) -> None:
+    """Save checkpoints in a safe manner even with when sharded checkpoints across nodes."""
+    def _barrier_if_sharded():
+        if is_sharded:
+            flashy.distrib.barrier()
+    if flashy.distrib.is_rank_zero():
+        token = Path(str(checkpoint_path) + '.tmp.done')
+        if token.exists():
+            token.unlink()
+    _barrier_if_sharded()
+    with flashy.utils.write_and_rename(checkpoint_path) as f:
+        torch.save(state, f)
+        _barrier_if_sharded()
+        if flashy.distrib.is_rank_zero():
+            token.touch()
+        _barrier_if_sharded()
+    _barrier_if_sharded()
+    if flashy.distrib.rank() == 0:
+        token.unlink()

audiocraft/utils/cluster.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Utility functions for SLURM configuration and cluster settings.
+"""
+from enum import Enum
+import os
+import socket
+import typing as tp
+import omegaconf
+class ClusterType(Enum):
+    AWS = "aws"
+    FAIR = "fair"
+    RSC = "rsc"
+    LOCAL_DARWIN = "darwin"
+    DEFAULT = "default"  # used for any other cluster.
+def _guess_cluster_type() -> ClusterType:
+    uname = os.uname()
+    fqdn = socket.getfqdn()
+    if uname.sysname == "Linux" and (uname.release.endswith("-aws") or ".ec2" in fqdn):
+        return ClusterType.AWS
+    if fqdn.endswith(".fair"):
+        return ClusterType.FAIR
+    if fqdn.endswith(".facebook.com"):
+        return ClusterType.RSC
+    if uname.sysname == "Darwin":
+        return ClusterType.LOCAL_DARWIN
+    return ClusterType.DEFAULT
+def get_cluster_type(
+    cluster_type: tp.Optional[ClusterType] = None,
+) -> tp.Optional[ClusterType]:
+    if cluster_type is None:
+        return _guess_cluster_type()
+    return cluster_type
+def get_slurm_parameters(
+    cfg: omegaconf.DictConfig, cluster_type: tp.Optional[ClusterType] = None
+) -> omegaconf.DictConfig:
+    """Update SLURM parameters in configuration based on cluster type.
+    If the cluster type is not specify, it infers it automatically.
+    """
+    from ..environment import AudioCraftEnvironment
+    cluster_type = get_cluster_type(cluster_type)
+    # apply cluster-specific adjustments
+    if cluster_type == ClusterType.AWS:
+        cfg["mem_per_gpu"] = None
+        cfg["constraint"] = None
+        cfg["setup"] = []
+    elif cluster_type == ClusterType.RSC:
+        cfg["mem_per_gpu"] = None
+        cfg["setup"] = []
+        cfg["constraint"] = None
+        cfg["partition"] = "learn"
+    slurm_exclude = AudioCraftEnvironment.get_slurm_exclude()
+    if slurm_exclude is not None:
+        cfg["exclude"] = slurm_exclude
+    return cfg

audiocraft/utils/deadlock.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+import os
+from queue import Queue, Empty
+import signal
+import sys
+import threading
+import traceback
+logger = logging.getLogger(__name__)
+class DeadlockDetect:
+    def __init__(self, use: bool = False, timeout: float = 120.):
+        self.use = use
+        self.timeout = timeout
+        self._queue: Queue = Queue()
+    def update(self, stage: str):
+        if self.use:
+            self._queue.put(stage)
+    def __enter__(self):
+        if self.use:
+            self._thread = threading.Thread(target=self._detector_thread)
+            self._thread.start()
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.use:
+            self._queue.put(None)
+            self._thread.join()
+    def _detector_thread(self):
+        logger.debug("Deadlock detector started")
+        last_stage = "init"
+        while True:
+            try:
+                stage = self._queue.get(timeout=self.timeout)
+            except Empty:
+                break
+            if stage is None:
+                logger.debug("Exiting deadlock detector thread")
+                return
+            else:
+                last_stage = stage
+        logger.error("Deadlock detector timed out, last stage was %s", last_stage)
+        for th in threading.enumerate():
+            print(th, file=sys.stderr)
+            traceback.print_stack(sys._current_frames()[th.ident])
+            print(file=sys.stderr)
+        sys.stdout.flush()
+        sys.stderr.flush()
+        os.kill(os.getpid(), signal.SIGKILL)

audiocraft/utils/export.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Utility to export a training checkpoint to a lightweight release checkpoint.
+"""
+from pathlib import Path
+import typing as tp
+from omegaconf import OmegaConf
+import torch
+from audiocraft import __version__
+def export_encodec(checkpoint_path: tp.Union[Path, str], out_file: tp.Union[Path, str]):
+    """Export only the best state from the given EnCodec checkpoint. This
+    should be used if you trained your own EnCodec model.
+    """
+    pkg = torch.load(checkpoint_path, 'cpu')
+    new_pkg = {
+        'best_state': pkg['best_state']['model'],
+        'xp.cfg': OmegaConf.to_yaml(pkg['xp.cfg']),
+        'version': __version__,
+        'exported': True,
+    }
+    Path(out_file).parent.mkdir(exist_ok=True, parents=True)
+    torch.save(new_pkg, out_file)
+    return out_file
+def export_pretrained_compression_model(pretrained_encodec: str, out_file: tp.Union[Path, str]):
+    """Export a compression model (potentially EnCodec) from a pretrained model.
+    This is required for packaging the audio tokenizer along a MusicGen or AudioGen model.
+    Do not include the //pretrained/ prefix. For instance if you trained a model
+    with `facebook/encodec_32khz`, just put that as a name. Same for `dac_44khz`.
+    In that case, this will not actually include a copy of the model, simply the reference
+    to the model used.
+    """
+    if Path(pretrained_encodec).exists():
+        pkg = torch.load(pretrained_encodec)
+        assert 'best_state' in pkg
+        assert 'xp.cfg' in pkg
+        assert 'version' in pkg
+        assert 'exported' in pkg
+    else:
+        pkg = {
+            'pretrained': pretrained_encodec,
+            'exported': True,
+            'version': __version__,
+        }
+    Path(out_file).parent.mkdir(exist_ok=True, parents=True)
+    torch.save(pkg, out_file)
+def export_lm(checkpoint_path: tp.Union[Path, str], out_file: tp.Union[Path, str]):
+    """Export only the best state from the given MusicGen or AudioGen checkpoint.
+    """
+    pkg = torch.load(checkpoint_path, 'cpu')
+    if pkg['fsdp_best_state']:
+        best_state = pkg['fsdp_best_state']['model']
+    else:
+        assert pkg['best_state']
+        best_state = pkg['best_state']['model']
+    new_pkg = {
+        'best_state': best_state,
+        'xp.cfg': OmegaConf.to_yaml(pkg['xp.cfg']),
+        'version': __version__,
+        'exported': True,
+    }
+    Path(out_file).parent.mkdir(exist_ok=True, parents=True)
+    torch.save(new_pkg, out_file)
+    return out_file

audiocraft/utils/export_legacy.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Legacy functions used at the time of the first release, kept for referencd.
+"""
+from pathlib import Path
+import typing as tp
+from omegaconf import OmegaConf, DictConfig
+import torch
+from audiocraft import __version__
+def _clean_lm_cfg(cfg: DictConfig):
+    OmegaConf.set_struct(cfg, False)
+    # This used to be set automatically in the LM solver, need a more robust solution
+    # for the future.
+    cfg['transformer_lm']['card'] = 2048
+    n_q = 4
+    stereo_cfg = getattr(cfg, 'interleave_stereo_codebooks', None)
+    if stereo_cfg is not None and stereo_cfg.use:
+        if 'downsample' in stereo_cfg:
+            del stereo_cfg['downsample']
+        n_q = 8
+    cfg['transformer_lm']['n_q'] = n_q
+    # Experimental params no longer supported.
+    bad_params = ['spectral_norm_attn_iters', 'spectral_norm_ff_iters',
+                  'residual_balancer_attn', 'residual_balancer_ff', 'layer_drop']
+    for name in bad_params:
+        del cfg['transformer_lm'][name]
+    OmegaConf.set_struct(cfg, True)
+    return cfg
+def export_encodec(checkpoint_path: tp.Union[Path, str], out_file: tp.Union[Path, str]):
+    pkg = torch.load(checkpoint_path, 'cpu')
+    new_pkg = {
+        'best_state': pkg['ema']['state']['model'],
+        'xp.cfg': OmegaConf.to_yaml(pkg['xp.cfg']),
+        # The following params were NOT exported for the first release of MusicGen.
+        'version': __version__,
+        'exported': True,
+    }
+    Path(out_file).parent.mkdir(exist_ok=True, parents=True)
+    torch.save(new_pkg, out_file)
+    return out_file
+def export_lm(checkpoint_path: tp.Union[Path, str], out_file: tp.Union[Path, str]):
+    pkg = torch.load(checkpoint_path, 'cpu')
+    if pkg['fsdp_best_state']:
+        best_state = pkg['fsdp_best_state']['model']
+    else:
+        best_state = pkg['best_state']['model']
+    new_pkg = {
+        'best_state': best_state,
+        'xp.cfg': OmegaConf.to_yaml(_clean_lm_cfg(pkg['xp.cfg'])),
+        # The following params were NOT exported for the first release of MusicGen.
+        'version': __version__,
+        'exported': True,
+    }
+    Path(out_file).parent.mkdir(exist_ok=True, parents=True)
+    torch.save(new_pkg, out_file)
+    return out_file

audiocraft/utils/notebook.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+try:
+    import IPython.display as ipd  # type: ignore
+except ImportError:
+    # Note in a notebook...
+    pass
+import torch
+def display_audio(samples: torch.Tensor, sample_rate: int):
+    """Renders an audio player for the given audio samples.
+    Args:
+        samples (torch.Tensor): a Tensor of decoded audio samples
+            with shapes [B, C, T] or [C, T]
+        sample_rate (int): sample rate audio should be displayed with.
+    """
+    assert samples.dim() == 2 or samples.dim() == 3
+    samples = samples.detach().cpu()
+    if samples.dim() == 2:
+        samples = samples[None, ...]
+    for audio in samples:
+        ipd.display(ipd.Audio(audio, rate=sample_rate))

audiocraft/utils/profiler.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+import typing as tp
+import dora
+import torch
+logger = logging.getLogger(__name__)
+class Profiler:
+    """Context manager wrapper for xformers profiler.
+    """
+    def __init__(self, module: torch.nn.Module, enabled: bool = False):
+        self.profiler: tp.Optional[tp.Any] = None
+        if enabled:
+            from xformers.profiler import profile
+            output_dir = dora.get_xp().folder / 'profiler_data'
+            logger.info("Profiling activated, results with be saved to %s", output_dir)
+            self.profiler = profile(output_dir=output_dir, module=module)
+    def step(self):
+        if self.profiler is not None:
+            self.profiler.step()  # type: ignore
+    def __enter__(self):
+        if self.profiler is not None:
+            return self.profiler.__enter__()  # type: ignore
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        if self.profiler is not None:
+            return self.profiler.__exit__(exc_type, exc_value, exc_tb)  # type: ignore

audiocraft/utils/utils.py ADDED Viewed

	@@ -0,0 +1,298 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from concurrent.futures import ProcessPoolExecutor
+from contextlib import contextmanager
+from functools import wraps, lru_cache
+import hashlib
+import json
+import logging
+from pathlib import Path
+import typing as tp
+import flashy
+import flashy.distrib
+import omegaconf
+import torch
+from torch.nn.utils.rnn import pad_sequence
+logger = logging.getLogger(__name__)
+def model_hash(model: torch.nn.Module) -> str:
+    """Return a model hash. This should allow us to track regressions in model init
+    from the logs of past experiments.
+    """
+    hasher = hashlib.sha1()
+    for p in model.parameters():
+        hasher.update(p.data.cpu().numpy().tobytes())
+    return hasher.hexdigest()
+def dict_from_config(cfg: omegaconf.DictConfig) -> dict:
+    """Convenience function to map an omegaconf configuration to a dictionary.
+    Args:
+        cfg (omegaconf.DictConfig): Original configuration to map to dict.
+    Returns:
+        dict: Config as dictionary object.
+    """
+    dct = omegaconf.OmegaConf.to_container(cfg, resolve=True)
+    assert isinstance(dct, dict)
+    return dct
+def random_subset(dataset, max_samples: int, seed: int = 42) -> torch.utils.data.Subset:
+    if max_samples >= len(dataset):
+        return dataset
+    generator = torch.Generator().manual_seed(seed)
+    perm = torch.randperm(len(dataset), generator=generator)
+    return torch.utils.data.Subset(dataset, perm[:max_samples].tolist())
+def get_loader(dataset, num_samples: tp.Optional[int], batch_size: int,
+               num_workers: int, seed: int, **kwargs) -> torch.utils.data.DataLoader:
+    """Convenience function to load dataset into a dataloader with optional subset sampling.
+    Args:
+        dataset: Dataset to load.
+        num_samples (Optional[int]): Number of samples to limit subset size.
+        batch_size (int): Batch size.
+        num_workers (int): Number of workers for data loading.
+        seed (int): Random seed.
+    """
+    if num_samples is not None:
+        dataset = random_subset(dataset, num_samples, seed)
+    dataloader = flashy.distrib.loader(
+        dataset,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        **kwargs
+    )
+    return dataloader
+def get_dataset_from_loader(dataloader):
+    dataset = dataloader.dataset
+    if isinstance(dataset, torch.utils.data.Subset):
+        return dataset.dataset
+    else:
+        return dataset
+def multinomial(input: torch.Tensor, num_samples: int, replacement=False, *, generator=None):
+    """torch.multinomial with arbitrary number of dimensions, and number of candidates on the last dimension.
+    Args:
+        input (torch.Tensor): The input tensor containing probabilities.
+        num_samples (int): Number of samples to draw.
+        replacement (bool): Whether to draw with replacement or not.
+    Keywords args:
+        generator (torch.Generator): A pseudorandom number generator for sampling.
+    Returns:
+        torch.Tensor: Last dimension contains num_samples indices
+            sampled from the multinomial probability distribution
+            located in the last dimension of tensor input.
+    """
+    input_ = input.reshape(-1, input.shape[-1])
+    output_ = torch.multinomial(input_, num_samples=num_samples, replacement=replacement, generator=generator)
+    output = output_.reshape(*list(input.shape[:-1]), -1)
+    return output
+def sample_top_k(probs: torch.Tensor, k: int) -> torch.Tensor:
+    """Sample next token from top K values along the last dimension of the input probs tensor.
+    Args:
+        probs (torch.Tensor): Input probabilities with token candidates on the last dimension.
+        k (int): The k in “top-k”.
+    Returns:
+        torch.Tensor: Sampled tokens.
+    """
+    top_k_value, _ = torch.topk(probs, k, dim=-1)
+    min_value_top_k = top_k_value[..., [-1]]
+    probs *= (probs >= min_value_top_k).float()
+    probs.div_(probs.sum(dim=-1, keepdim=True))
+    next_token = multinomial(probs, num_samples=1)
+    return next_token
+def sample_top_p(probs: torch.Tensor, p: float) -> torch.Tensor:
+    """Sample next token from top P probabilities along the last dimension of the input probs tensor.
+    Args:
+        probs (torch.Tensor): Input probabilities with token candidates on the last dimension.
+        p (int): The p in “top-p”.
+    Returns:
+        torch.Tensor: Sampled tokens.
+    """
+    probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
+    probs_sum = torch.cumsum(probs_sort, dim=-1)
+    mask = probs_sum - probs_sort > p
+    probs_sort *= (~mask).float()
+    probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
+    next_token = multinomial(probs_sort, num_samples=1)
+    next_token = torch.gather(probs_idx, -1, next_token)
+    return next_token
+class DummyPoolExecutor:
+    """Dummy pool executor to use when we actually have only 1 worker.
+    (e.g. instead of ProcessPoolExecutor).
+    """
+    class DummyResult:
+        def __init__(self, func, *args, **kwargs):
+            self.func = func
+            self.args = args
+            self.kwargs = kwargs
+        def result(self):
+            return self.func(*self.args, **self.kwargs)
+    def __init__(self, workers, mp_context=None):
+        pass
+    def submit(self, func, *args, **kwargs):
+        return DummyPoolExecutor.DummyResult(func, *args, **kwargs)
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        return
+def get_pool_executor(num_workers: int, mp_context=None):
+    return ProcessPoolExecutor(num_workers, mp_context) if num_workers > 1 else DummyPoolExecutor(1)
+def length_to_mask(lengths: torch.Tensor, max_len: tp.Optional[int] = None) -> torch.Tensor:
+    """Utility function to convert a tensor of sequence lengths to a mask (useful when working on padded sequences).
+    For example: [3, 5] => [[1, 1, 1, 0, 0], [1, 1, 1, 1, 1]]
+    Args:
+        lengths (torch.Tensor): tensor with lengths
+        max_len (int): can set the max length manually. Defaults to None.
+    Returns:
+        torch.Tensor: mask with 0s where there is pad tokens else 1s
+    """
+    assert len(lengths.shape) == 1, "Length shape should be 1 dimensional."
+    final_length = lengths.max().item() if not max_len else max_len
+    final_length = max(final_length, 1)  # if all seqs are of len zero we don't want a zero-size tensor
+    return torch.arange(final_length, device=lengths.device)[None, :] < lengths[:, None]
+def hash_trick(word: str, vocab_size: int) -> int:
+    """Hash trick to pair each word with an index
+    Args:
+        word (str): word we wish to convert to an index
+        vocab_size (int): size of the vocabulary
+    Returns:
+        int: index of the word in the embedding LUT
+    """
+    hash = int(hashlib.sha256(word.encode("utf-8")).hexdigest(), 16)
+    return hash % vocab_size
+def with_rank_rng(base_seed: int = 1234):
+    """Decorator for a function so that the function will use a Random Number Generator
+    whose state depend on the GPU rank. The original RNG state is restored upon returning.
+    Args:
+        base_seed (int): Random seed.
+    """
+    def _decorator(fun: tp.Callable):
+        @wraps(fun)
+        def _decorated(*args, **kwargs):
+            state = torch.get_rng_state()
+            seed = base_seed ^ flashy.distrib.rank()
+            torch.manual_seed(seed)
+            logger.debug('Rank dependent seed set to %d', seed)
+            try:
+                return fun(*args, **kwargs)
+            finally:
+                torch.set_rng_state(state)
+                logger.debug('RNG state restored.')
+        return _decorated
+    return _decorator
+def collate(tensors: tp.List[torch.Tensor], dim: int = 0) -> tp.Tuple[torch.Tensor, torch.Tensor]:
+    """Get a list of tensors and collate them to a single tensor. according to the following logic:
+    - `dim` specifies the time dimension which will be stacked and padded.
+    - The output will contain 1 new dimension (dimension index 0) which will be the size of
+    of the original list.
+    Args:
+        tensors (tp.List[torch.Tensor]): List of tensors to collate.
+        dim (int): Dimension which will be stacked and padded.
+    Returns:
+        tp.Tuple[torch.Tensor, torch.Tensor]:
+            torch.Tensor: Stacked and padded tensor. The output will contain 1 new dimension
+                (dimension index 0) which will be the size of the original list.
+            torch.Tensor: Tensor containing length of original tensor sizes (without padding).
+    """
+    tensors = [x.transpose(0, dim) for x in tensors]
+    lens = torch.LongTensor([len(x) for x in tensors])
+    padded_tensors = pad_sequence(tensors)
+    padded_tensors = padded_tensors.transpose(0, 1)
+    padded_tensors = padded_tensors.transpose(1, dim + 1)
+    return padded_tensors, lens
+# TODO: Move to flashy?
+def copy_state(state: tp.Any, device: tp.Union[torch.device, str] = 'cpu',
+               dtype: tp.Optional[torch.dtype] = None) -> tp.Any:
+    if isinstance(state, torch.Tensor):
+        if dtype is None or not state.is_floating_point():
+            dtype = state.dtype
+        return state.detach().to(device=device, dtype=dtype, copy=True)
+    elif isinstance(state, dict):
+        return {k: copy_state(v, device, dtype) for k, v in state.items()}
+    elif isinstance(state, list):
+        return [copy_state(v, device, dtype) for v in state]
+# TODO: Move to flashy?
+@contextmanager
+def swap_state(model, state, **kwargs):
+    old_state = copy_state(model.state_dict())
+    model.load_state_dict(state, **kwargs)
+    try:
+        yield
+    finally:
+        model.load_state_dict(old_state)
+@lru_cache(None)
+def warn_once(logger, msg):
+    """Warn about a given message only once."""
+    logger.warning(msg)
+def is_jsonable(x: tp.Any):
+    """Check if an object can be serialized into a json:"""
+    try:
+        json.dumps(x)
+        return True
+    except (TypeError, OverflowError):
+        return False
+def load_clap_state_dict(clap_model, path: tp.Union[str, Path]):
+    """Wrapper around state dict loading of CLAP model
+    addressing compatibility issues between CLAP and AudioCraft
+    HuggingFace transformer version.
+    See: https://github.com/LAION-AI/CLAP/issues/118
+    """
+    from clap_module.factory import load_state_dict  # type: ignore
+    pkg = load_state_dict(path)
+    pkg.pop('text_branch.embeddings.position_ids', None)
+    clap_model.model.load_state_dict(pkg)