import sys import time import face_alignment sys.path.append('..') import multiprocessing from multiprocessing import Pool from tqdm.auto import tqdm from Extractor import export_frames from helpers import * RUN_PARALLEL = True PARALLEL_USE_GPU = True NUM_WORKERS = 12 LRS2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2/lrs2_v1' videos_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main') images_dir = os.path.join(LRS2_dir, 'mvlrs_v1/main_images') usable_sets_paths = [ '../data/LRS2_CTC2_train.txt', '../data/LRS2_CTC2_val.txt', '../data/LRS2_CTC2_test.txt' ] USABLE_VIDEO_GROUP_NAMES = [] for usable_sets_path in usable_sets_paths: USABLE_VIDEO_GROUP_NAMES.extend([ x.strip() for x in open(usable_sets_path).readlines() ]) USABLE_VIDEO_GROUP_NAMES = set(USABLE_VIDEO_GROUP_NAMES) lock = multiprocessing.Lock() group_ids = os.listdir(videos_dir) video_paths = [] for group_id in group_ids: group_dirpath = os.path.join(videos_dir, group_id) filenames = os.listdir(group_dirpath) for filename in filenames: if not filename.endswith('.mp4'): continue basename, _ = os.path.splitext(filename) video_group_name = f'{group_id}/{basename}' if video_group_name not in USABLE_VIDEO_GROUP_NAMES: continue video_paths.append((group_id, filename)) def process_video( video_path, video_images_dir, use_gpu ): export_frames( video_path, video_images_dir, recycle_landmarks=True, use_gpu=use_gpu, ) # print('PROC', video_path) # print('PROCESS_END', group_id, video_filename) return video_path, video_images_dir def callback(result): pbar.desc = str(result) pbar.update(1) # pbar.refresh() # shutil.rmtree(vid_temp_dir) if RUN_PARALLEL: pbar = tqdm(video_paths) pool = Pool(processes=NUM_WORKERS) jobs = [] for data_pair in video_paths: group_id, video_filename = data_pair pbar.desc = str(data_pair) basename, _ = os.path.splitext(video_filename) temp_id = f'{group_id}-{basename}' # vid_temp_dir = os.path.join(temp_dir, temp_id) video_path = os.path.join(videos_dir, group_id, video_filename) group_images_dir = os.path.join(images_dir, group_id) video_images_dir = os.path.join(group_images_dir, basename) if not os.path.exists(group_images_dir): os.mkdir(group_images_dir) if not os.path.exists(video_images_dir): os.mkdir(video_images_dir) job_kwargs = kwargify( video_path=video_path, video_images_dir=video_images_dir, use_gpu=PARALLEL_USE_GPU ) job = pool.apply_async( process_video, kwds=job_kwargs, callback=callback ) jobs.append(job) print('ALL JOBS ADDED', len(jobs)) # Wait for all tasks to complete for job in jobs: job.wait() pool.close() pool.join() else: pbar = tqdm(video_paths) for data_pair in pbar: group_id, video_filename = data_pair basename, _ = os.path.splitext(video_filename) temp_id = f'{group_id}-{basename}' # vid_temp_dir = os.path.join(temp_dir, temp_id) video_path = os.path.join(videos_dir, group_id, video_filename) group_images_dir = os.path.join(images_dir, group_id) video_images_dir = os.path.join(group_images_dir, basename) if not os.path.exists(group_images_dir): os.mkdir(group_images_dir) if not os.path.exists(video_images_dir): os.mkdir(video_images_dir) process_video( video_path=video_path, video_images_dir=video_images_dir, use_gpu=True ) print('TOTAL VIDEOS', len(video_paths)) # print('COMPLETE EXTRACTED VIDEOS', complete_extractions) print('>>>')