File size: 3,093 Bytes
f32cd36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import warnings
from pathlib import Path

import argbind
import numpy as np
import torch
from audiotools import AudioSignal
from tqdm import tqdm

from dac import DACFile
from dac.utils import load_model

warnings.filterwarnings("ignore", category=UserWarning)


@argbind.bind(group="decode", positional=True, without_prefix=True)
@torch.inference_mode()
@torch.no_grad()
def decode(

    input: str,

    output: str = "",

    weights_path: str = "",

    model_tag: str = "latest",

    model_bitrate: str = "8kbps",

    device: str = "cuda",

    model_type: str = "44khz",

    verbose: bool = False,

):
    """Decode audio from codes.



    Parameters

    ----------

    input : str

        Path to input directory or file

    output : str, optional

        Path to output directory, by default "".

        If `input` is a directory, the directory sub-tree relative to `input` is re-created in `output`.

    weights_path : str, optional

        Path to weights file, by default "". If not specified, the weights file will be downloaded from the internet using the

        model_tag and model_type.

    model_tag : str, optional

        Tag of the model to use, by default "latest". Ignored if `weights_path` is specified.

    model_bitrate: str

        Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".

    device : str, optional

        Device to use, by default "cuda". If "cpu", the model will be loaded on the CPU.

    model_type : str, optional

        The type of model to use. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz". Ignored if `weights_path` is specified.

    """
    generator = load_model(
        model_type=model_type,
        model_bitrate=model_bitrate,
        tag=model_tag,
        load_path=weights_path,
    )
    generator.to(device)
    generator.eval()

    # Find all .dac files in input directory
    _input = Path(input)
    input_files = list(_input.glob("**/*.dac"))

    # If input is a .dac file, add it to the list
    if _input.suffix == ".dac":
        input_files.append(_input)

    # Create output directory
    output = Path(output)
    output.mkdir(parents=True, exist_ok=True)

    for i in tqdm(range(len(input_files)), desc=f"Decoding files"):
        # Load file
        artifact = DACFile.load(input_files[i])

        # Reconstruct audio from codes
        recons = generator.decompress(artifact, verbose=verbose)

        # Compute output path
        relative_path = input_files[i].relative_to(input)
        output_dir = output / relative_path.parent
        if not relative_path.name:
            output_dir = output
            relative_path = input_files[i]
        output_name = relative_path.with_suffix(".wav").name
        output_path = output_dir / output_name
        output_path.parent.mkdir(parents=True, exist_ok=True)

        # Write to file
        recons.write(output_path)


if __name__ == "__main__":
    args = argbind.parse_args()
    with argbind.scope(args):
        decode()