File size: 3,854 Bytes
e34aada |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
import numpy as np
from scipy.misc import face
import torch
from tqdm import trange
import pickle
from copy import deepcopy
from data_util.face3d_helper import Face3DHelper
from utils.commons.indexed_datasets import IndexedDataset, IndexedDatasetBuilder
def load_video_npy(fn):
assert fn.endswith("_coeff_fit_mp.npy")
ret_dict = np.load(fn,allow_pickle=True).item()
video_dict = {
'euler': ret_dict['euler'], # [T, 3]
'trans': ret_dict['trans'], # [T, 3]
'id': ret_dict['id'], # [T, 80]
'exp': ret_dict['exp'], # [T, 64]
}
return video_dict
def cal_lm3d_in_video_dict(video_dict, face3d_helper):
identity = video_dict['id']
exp = video_dict['exp']
idexp_lm3d = face3d_helper.reconstruct_idexp_lm3d(identity, exp).cpu().numpy()
video_dict['idexp_lm3d'] = idexp_lm3d
def load_audio_npy(fn):
assert fn.endswith(".npy")
ret_dict = np.load(fn,allow_pickle=True).item()
audio_dict = {
"mel": ret_dict['mel'], # [T, 80]
"f0": ret_dict['f0'], # [T,1]
}
return audio_dict
if __name__ == '__main__':
face3d_helper = Face3DHelper(use_gpu=False)
import glob,tqdm
prefixs = ['val', 'train']
binarized_ds_path = "data/binary/th1kh"
os.makedirs(binarized_ds_path, exist_ok=True)
for prefix in prefixs:
databuilder = IndexedDatasetBuilder(os.path.join(binarized_ds_path, prefix), gzip=False, default_idx_size=1024*1024*1024*2)
raw_base_dir = '/mnt/bn/ailabrenyi/entries/yezhenhui/datasets/raw/TH1KH_512/video'
mp4_names = glob.glob(os.path.join(raw_base_dir, '*.mp4'))
mp4_names = mp4_names[:1000]
cnt = 0
scnt = 0
pbar = tqdm.tqdm(enumerate(mp4_names), total=len(mp4_names))
for i, mp4_name in pbar:
cnt += 1
if prefix == 'train':
if i % 100 == 0:
continue
else:
if i % 100 != 0:
continue
hubert_npy_name = mp4_name.replace("/video/", "/hubert/").replace(".mp4", "_hubert.npy")
audio_npy_name = mp4_name.replace("/video/", "/mel_f0/").replace(".mp4", "_mel_f0.npy")
video_npy_name = mp4_name.replace("/video/", "/coeff_fit_mp/").replace(".mp4", "_coeff_fit_mp.npy")
if not os.path.exists(audio_npy_name):
print(f"Skip item for audio npy not found.")
continue
if not os.path.exists(video_npy_name):
print(f"Skip item for video npy not found.")
continue
if (not os.path.exists(hubert_npy_name)):
print(f"Skip item for hubert_npy not found.")
continue
audio_dict = load_audio_npy(audio_npy_name)
hubert = np.load(hubert_npy_name)
video_dict = load_video_npy(video_npy_name)
com_img_dir = mp4_name.replace("/video/", "/com_imgs/").replace(".mp4", "")
num_com_imgs = len(glob.glob(os.path.join(com_img_dir, '*')))
num_frames = len(video_dict['exp'])
if num_com_imgs != num_frames:
print(f"Skip item for length mismatch.")
continue
mel = audio_dict['mel']
if mel.shape[0] < 32: # the video is shorter than 0.6s
print(f"Skip item for too short.")
continue
audio_dict.update(video_dict)
audio_dict['item_id'] = os.path.basename(mp4_name)[:-4]
audio_dict['hubert'] = hubert # [T_x, hid=1024]
audio_dict['img_dir'] = com_img_dir
databuilder.add_item(audio_dict)
scnt += 1
pbar.set_postfix({'success': scnt, 'success rate': scnt / cnt})
databuilder.finalize()
print(f"{prefix} set has {cnt} samples!") |