|
import torch |
|
import soundfile as sf |
|
from transformers import AutoConfig |
|
|
|
from modeling_xcodec2 import XCodec2Model |
|
|
|
model_path = "/data/zheny/xcodec2" |
|
|
|
model = XCodec2Model.from_pretrained(model_path) |
|
model.eval().cuda() |
|
|
|
|
|
wav, sr = sf.read("test.flac") |
|
wav_tensor = torch.from_numpy(wav).float().unsqueeze(0) |
|
|
|
with torch.no_grad(): |
|
vq_code = model.encode_code(input_waveform=wav_tensor ) |
|
print(vq_code) |
|
recon_wav = model.decode_code(vq_code).cpu() |
|
|
|
sf.write("reconstructed.wav", recon_wav[0,0,:].numpy(), sr) |
|
|