File size: 3,854 Bytes
e34aada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import numpy as np
from scipy.misc import face
import torch
from tqdm import trange
import pickle
from copy import deepcopy

from data_util.face3d_helper import Face3DHelper
from utils.commons.indexed_datasets import IndexedDataset, IndexedDatasetBuilder


def load_video_npy(fn):
    assert fn.endswith("_coeff_fit_mp.npy")
    ret_dict = np.load(fn,allow_pickle=True).item()
    video_dict = {
        'euler': ret_dict['euler'], # [T, 3]
        'trans': ret_dict['trans'], # [T, 3]
        'id': ret_dict['id'], # [T, 80]
        'exp': ret_dict['exp'], # [T, 64]
    }
    return video_dict

def cal_lm3d_in_video_dict(video_dict, face3d_helper):
    identity = video_dict['id']
    exp = video_dict['exp']
    idexp_lm3d = face3d_helper.reconstruct_idexp_lm3d(identity, exp).cpu().numpy()
    video_dict['idexp_lm3d'] = idexp_lm3d


def load_audio_npy(fn):
    assert fn.endswith(".npy")
    ret_dict = np.load(fn,allow_pickle=True).item()
    audio_dict = {
        "mel": ret_dict['mel'], # [T, 80]
        "f0": ret_dict['f0'], # [T,1]
    }
    return audio_dict


if __name__ == '__main__':
    face3d_helper = Face3DHelper(use_gpu=False)
    
    import glob,tqdm
    prefixs = ['val', 'train']
    binarized_ds_path = "data/binary/th1kh"
    os.makedirs(binarized_ds_path, exist_ok=True)
    for prefix in prefixs:
        databuilder = IndexedDatasetBuilder(os.path.join(binarized_ds_path, prefix), gzip=False, default_idx_size=1024*1024*1024*2)
        raw_base_dir =  '/mnt/bn/ailabrenyi/entries/yezhenhui/datasets/raw/TH1KH_512/video'
        mp4_names = glob.glob(os.path.join(raw_base_dir, '*.mp4'))
        mp4_names = mp4_names[:1000]
        cnt = 0
        scnt = 0
        pbar = tqdm.tqdm(enumerate(mp4_names), total=len(mp4_names))
        for i, mp4_name in pbar:
            cnt += 1
            if prefix == 'train':
                if i % 100 == 0:
                    continue
            else:
                if i % 100 != 0:
                    continue
            hubert_npy_name = mp4_name.replace("/video/", "/hubert/").replace(".mp4", "_hubert.npy")
            audio_npy_name = mp4_name.replace("/video/", "/mel_f0/").replace(".mp4", "_mel_f0.npy")
            video_npy_name = mp4_name.replace("/video/", "/coeff_fit_mp/").replace(".mp4", "_coeff_fit_mp.npy")
            if not os.path.exists(audio_npy_name):
                print(f"Skip item for audio npy not found.")
                continue
            if not os.path.exists(video_npy_name):
                print(f"Skip item for video npy not found.")
                continue
            if (not os.path.exists(hubert_npy_name)):
                print(f"Skip item for hubert_npy not found.")
                continue
            audio_dict = load_audio_npy(audio_npy_name)
            hubert = np.load(hubert_npy_name)
            video_dict = load_video_npy(video_npy_name)
            com_img_dir = mp4_name.replace("/video/", "/com_imgs/").replace(".mp4", "")
            num_com_imgs = len(glob.glob(os.path.join(com_img_dir, '*')))
            num_frames = len(video_dict['exp'])
            if num_com_imgs != num_frames:
                print(f"Skip item for length mismatch.")
                continue
            mel = audio_dict['mel']
            if mel.shape[0] < 32: # the video is shorter than 0.6s
                print(f"Skip item for too short.")
                continue
            
            audio_dict.update(video_dict)
            audio_dict['item_id'] = os.path.basename(mp4_name)[:-4]
            audio_dict['hubert'] = hubert # [T_x, hid=1024]
            audio_dict['img_dir'] = com_img_dir


            databuilder.add_item(audio_dict)
            scnt += 1
            pbar.set_postfix({'success': scnt, 'success rate': scnt / cnt})
        databuilder.finalize()
        print(f"{prefix} set has {cnt} samples!")