File size: 600 Bytes
574a515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import torch
import soundfile as sf
from transformers import AutoConfig
from modeling_xcodec2 import XCodec2Model
model_path = "/data/zheny/xcodec2" # 这是你在 huggingface 上的仓库名
model = XCodec2Model.from_pretrained(model_path)
model.eval().cuda()
# 准备一段音频
wav, sr = sf.read("test.flac")
wav_tensor = torch.from_numpy(wav).float().unsqueeze(0) # [1, time]
with torch.no_grad():
vq_code = model.encode_code(input_waveform=wav_tensor )
print(vq_code)
recon_wav = model.decode_code(vq_code).cpu()
sf.write("reconstructed.wav", recon_wav[0,0,:].numpy(), sr)
|