File size: 600 Bytes
574a515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import torch
import soundfile as sf
from transformers import AutoConfig

from  modeling_xcodec2 import XCodec2Model

model_path = "/data/zheny/xcodec2"  # 这是你在 huggingface 上的仓库名

model = XCodec2Model.from_pretrained(model_path)
model.eval().cuda()

# 准备一段音频
wav, sr = sf.read("test.flac")
wav_tensor = torch.from_numpy(wav).float().unsqueeze(0)  # [1, time]

with torch.no_grad():
    vq_code = model.encode_code(input_waveform=wav_tensor )
    print(vq_code)
    recon_wav = model.decode_code(vq_code).cpu()

sf.write("reconstructed.wav", recon_wav[0,0,:].numpy(), sr)