File size: 1,422 Bytes
73baeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import argparse
from pathlib import Path

import numpy as np
import torch
import torchaudio
from encodec import EncodecModel
from encodec.utils import convert_audio
from tqdm import tqdm

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_path", type=str, required=True, help="Path of the original wav files"
    )
    parser.add_argument(
        "--save_path", type=str, required=True, help="Path to save encodec .npy files"
    )
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = EncodecModel.encodec_model_24khz()
    model.set_target_bandwidth(12.0)
    model = model.to(device)

    data_path = Path(args.data_path)
    save_path = Path(args.save_path)

    with torch.no_grad():
        for wav_path in tqdm(data_path.glob("**/*.wav")):
            wav, sr = torchaudio.load(wav_path)
            wav = convert_audio(wav, sr, model.sample_rate, model.channels)
            wav = wav.unsqueeze(0).to(device)
            encoded_frames = model.encode(wav)

            codes = torch.cat([codebook for codebook, _ in encoded_frames], dim=-1)
            codes = codes.cpu().squeeze(0).transpose(-1, -2).detach().numpy()

            out_path = save_path / wav_path.with_suffix(".npy").relative_to(data_path)
            out_path.parent.mkdir(exist_ok=True)
            np.save(out_path, codes)