|
import os
|
|
import torch
|
|
import librosa
|
|
import numpy as np
|
|
import soundfile as sf
|
|
import pandas as pd
|
|
|
|
from feats.contentvec_hf import get_content_model, get_content
|
|
|
|
|
|
from tqdm import tqdm
|
|
from multiprocessing import Process
|
|
import pyworld as pw
|
|
|
|
|
|
def resample_save(infolder, audio_path, model,
|
|
audio_sr=24000, content_sr=16000, min_length=1.92,
|
|
content_resolution=50,
|
|
save_path='features'):
|
|
if os.path.isfile(save_path + '/' + 'audio_24k/' + audio_path) is False:
|
|
audio, sr = librosa.load(infolder + audio_path, sr=content_sr)
|
|
final_length = audio.shape[-1] // (content_sr / content_resolution) * (content_sr / content_resolution)
|
|
|
|
|
|
length = max(round(min_length*content_sr), round(final_length))
|
|
assert length % 10 == 0
|
|
audio = audio[:length]
|
|
audio_save = np.zeros(length, dtype=audio.dtype)
|
|
audio_save[:audio.shape[-1]] = audio[:audio.shape[-1]]
|
|
|
|
|
|
content = get_content(model, torch.tensor(audio_save).unsqueeze(0))
|
|
content = content.cpu()
|
|
os.makedirs(os.path.dirname(save_path + '/' + 'content/' + audio_path), exist_ok=True)
|
|
torch.save(content, save_path + '/' + 'content/' + audio_path+'.pt')
|
|
|
|
|
|
os.makedirs(os.path.dirname(save_path + '/' + 'audio_16k/' + audio_path), exist_ok=True)
|
|
sf.write(save_path + '/' + 'audio_16k/' + audio_path, audio_save, int(sr))
|
|
|
|
|
|
audio, sr = librosa.load(infolder + audio_path, sr=audio_sr)
|
|
length = max(round(min_length*audio_sr), round(final_length/content_sr*audio_sr))
|
|
assert length % 10 == 0
|
|
audio = audio[:length]
|
|
audio_save = np.zeros(length, dtype=audio.dtype)
|
|
audio_save[:audio.shape[-1]] = audio[:audio.shape[-1]]
|
|
|
|
os.makedirs(os.path.dirname(save_path + '/' + 'audio_24k/' + audio_path), exist_ok=True)
|
|
sf.write(save_path + '/' + 'audio_24k/' + audio_path, audio_save, int(sr))
|
|
|
|
|
|
def extract_f0(in_folder, audio_path, save_path):
|
|
audio, sr = librosa.load(in_folder + audio_path, sr=None)
|
|
assert sr == 16000
|
|
if os.path.isfile(save_path + '/' + 'f0/' + audio_path + '.pt') is False:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_f0, t = pw.dio(audio.astype(np.float64), sr, frame_period=320 / sr * 1000)
|
|
f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr)[:-1]
|
|
|
|
f0 = np.nan_to_num(f0)
|
|
os.makedirs(os.path.dirname(save_path + '/' + 'f0/' + audio_path), exist_ok=True)
|
|
|
|
torch.save(torch.tensor(f0), save_path + '/' + 'f0/' + audio_path + '.pt')
|
|
|
|
|
|
def chunks(arr, m):
|
|
result = [[] for i in range(m)]
|
|
for i in range(len(arr)):
|
|
result[i%m].append(arr[i])
|
|
return result
|
|
|
|
|
|
def extract_f0_main(in_folder, audio_paths, save_path):
|
|
for audio_path in tqdm(audio_paths):
|
|
extract_f0(in_folder, audio_path, save_path)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
df = pd.read_csv('../test_data/vc_meta.csv')
|
|
|
|
model = get_content_model().to('cuda')
|
|
|
|
for i in tqdm(range(len(df))):
|
|
row = df.iloc[i]
|
|
in_path = row['path']
|
|
resample_save('../test_data/', in_path, model, save_path='../features/')
|
|
|
|
in_folder = '../features/audio_16k/'
|
|
audio_files = list(df['path'])
|
|
save_path = '../features/'
|
|
cores = 6
|
|
|
|
subsets = chunks(audio_files, cores)
|
|
|
|
for subset in subsets:
|
|
t = Process(target=extract_f0_main, args=(in_folder, subset, save_path))
|
|
t.start() |