torchnet / scripts /extract_crop_lips_v2.py
milselarch's picture
push to main
df07554
import sys
import time
import face_alignment
sys.path.append('..')
import multiprocessing
from multiprocessing import Pool
from tqdm.auto import tqdm
from Extractor import export_frames
from helpers import *
RUN_PARALLEL = True
PARALLEL_USE_GPU = True
NUM_WORKERS = 12
LRS2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2/lrs2_v1'
videos_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main')
images_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main_images')
usable_sets_paths = [
'../data/LRS2_CTC2_train.txt',
'../data/LRS2_CTC2_val.txt',
'../data/LRS2_CTC2_test.txt'
]
USABLE_VIDEO_GROUP_NAMES = []
for usable_sets_path in usable_sets_paths:
USABLE_VIDEO_GROUP_NAMES.extend([
x.strip() for x in open(usable_sets_path).readlines()
])
USABLE_VIDEO_GROUP_NAMES = set(USABLE_VIDEO_GROUP_NAMES)
lock = multiprocessing.Lock()
group_ids = os.listdir(videos_dir)
video_paths = []
for group_id in group_ids:
group_dirpath = os.path.join(videos_dir, group_id)
filenames = os.listdir(group_dirpath)
for filename in filenames:
if not filename.endswith('.mp4'):
continue
basename, _ = os.path.splitext(filename)
video_group_name = f'{group_id}/{basename}'
if video_group_name not in USABLE_VIDEO_GROUP_NAMES:
continue
video_paths.append((group_id, filename))
def process_video(
video_path, video_images_dir, use_gpu
):
export_frames(
video_path, video_images_dir,
recycle_landmarks=True, use_gpu=use_gpu,
)
# print('PROC', video_path)
# print('PROCESS_END', group_id, video_filename)
return video_path, video_images_dir
def callback(result):
pbar.desc = str(result)
pbar.update(1)
# pbar.refresh()
# shutil.rmtree(vid_temp_dir)
if RUN_PARALLEL:
pbar = tqdm(video_paths)
pool = Pool(processes=NUM_WORKERS)
jobs = []
for data_pair in video_paths:
group_id, video_filename = data_pair
pbar.desc = str(data_pair)
basename, _ = os.path.splitext(video_filename)
temp_id = f'{group_id}-{basename}'
# vid_temp_dir = os.path.join(temp_dir, temp_id)
video_path = os.path.join(videos_dir, group_id, video_filename)
group_images_dir = os.path.join(images_dir, group_id)
video_images_dir = os.path.join(group_images_dir, basename)
if not os.path.exists(group_images_dir):
os.mkdir(group_images_dir)
if not os.path.exists(video_images_dir):
os.mkdir(video_images_dir)
job_kwargs = kwargify(
video_path=video_path,
video_images_dir=video_images_dir,
use_gpu=PARALLEL_USE_GPU
)
job = pool.apply_async(
process_video, kwds=job_kwargs,
callback=callback
)
jobs.append(job)
print('ALL JOBS ADDED', len(jobs))
# Wait for all tasks to complete
for job in jobs:
job.wait()
pool.close()
pool.join()
else:
pbar = tqdm(video_paths)
for data_pair in pbar:
group_id, video_filename = data_pair
basename, _ = os.path.splitext(video_filename)
temp_id = f'{group_id}-{basename}'
# vid_temp_dir = os.path.join(temp_dir, temp_id)
video_path = os.path.join(videos_dir, group_id, video_filename)
group_images_dir = os.path.join(images_dir, group_id)
video_images_dir = os.path.join(group_images_dir, basename)
if not os.path.exists(group_images_dir):
os.mkdir(group_images_dir)
if not os.path.exists(video_images_dir):
os.mkdir(video_images_dir)
process_video(
video_path=video_path,
video_images_dir=video_images_dir,
use_gpu=True
)
print('TOTAL VIDEOS', len(video_paths))
# print('COMPLETE EXTRACTED VIDEOS', complete_extractions)
print('>>>')