File size: 7,817 Bytes
0dcccdd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import argparse
import os
import subprocess
from datetime import datetime, timedelta
from pathlib import Path
from multiprocessing import Pool
import pandas as pd
from tqdm import tqdm
from utils.logger import logger
MIN_SECONDS = int(os.getenv("MIN_SECONDS", 3))
MAX_SECONDS = int(os.getenv("MAX_SECONDS", 10))
def get_command(start_time, video_path, video_duration, output_path):
# Use FFmpeg to split the video. Re-encoding is needed to ensure the accuracy of the clip
# at the cost of consuming computational resources.
return [
'ffmpeg',
'-hide_banner',
'-loglevel', 'panic',
'-ss', str(start_time.time()),
'-i', video_path,
'-t', str(video_duration),
'-c:v', 'libx264',
'-preset', 'veryfast',
'-crf', '22',
'-c:a', 'aac',
'-sn',
output_path
]
def clip_video_star(args):
return clip_video(*args)
def clip_video(video_path, timecode_list, output_folder, video_duration):
"""Recursively clip the video within the range of [MIN_SECONDS, MAX_SECONDS],
according to the timecode obtained from cogvideox/video_caption/cutscene_detect.py.
"""
try:
video_name = Path(video_path).stem
if len(timecode_list) == 0: # The video of a single scene.
splitted_timecode_list = []
start_time = datetime.strptime("00:00:00.000", "%H:%M:%S.%f")
end_time = datetime.strptime(video_duration, "%H:%M:%S.%f")
cur_start = start_time
splitted_index = 0
while cur_start < end_time:
cur_end = min(cur_start + timedelta(seconds=MAX_SECONDS), end_time)
cur_video_duration = (cur_end - cur_start).total_seconds()
if cur_video_duration < MIN_SECONDS:
cur_start = cur_end
splitted_index += 1
continue
splitted_timecode_list.append([cur_start.strftime("%H:%M:%S.%f")[:-3], cur_end.strftime("%H:%M:%S.%f")[:-3]])
output_path = os.path.join(output_folder, video_name + f"_{splitted_index}.mp4")
if os.path.exists(output_path):
logger.info(f"The clipped video {output_path} exists.")
cur_start = cur_end
splitted_index += 1
continue
else:
command = get_command(cur_start, video_path, cur_video_duration, output_path)
try:
subprocess.run(command, check=True)
except Exception as e:
logger.warning(f"Run {command} error: {e}.")
finally:
cur_start = cur_end
splitted_index += 1
for i, timecode in enumerate(timecode_list): # The video of multiple scenes.
start_time = datetime.strptime(timecode[0], "%H:%M:%S.%f")
end_time = datetime.strptime(timecode[1], "%H:%M:%S.%f")
video_duration = (end_time - start_time).total_seconds()
output_path = os.path.join(output_folder, video_name + f"_{i}.mp4")
if os.path.exists(output_path):
logger.info(f"The clipped video {output_path} exists.")
continue
if video_duration < MIN_SECONDS:
continue
if video_duration > MAX_SECONDS:
splitted_timecode_list = []
cur_start = start_time
splitted_index = 0
while cur_start < end_time:
cur_end = min(cur_start + timedelta(seconds=MAX_SECONDS), end_time)
cur_video_duration = (cur_end - cur_start).total_seconds()
if cur_video_duration < MIN_SECONDS:
break
splitted_timecode_list.append([cur_start.strftime("%H:%M:%S.%f")[:-3], cur_end.strftime("%H:%M:%S.%f")[:-3]])
splitted_output_path = os.path.join(output_folder, video_name + f"_{i}_{splitted_index}.mp4")
if os.path.exists(splitted_output_path):
logger.info(f"The clipped video {splitted_output_path} exists.")
cur_start = cur_end
splitted_index += 1
continue
else:
command = get_command(cur_start, video_path, cur_video_duration, splitted_output_path)
try:
subprocess.run(command, check=True)
except Exception as e:
logger.warning(f"Run {command} error: {e}.")
finally:
cur_start = cur_end
splitted_index += 1
continue
# We found that the current scene detected by PySceneDetect includes a few frames from
# the next scene occasionally. Directly discard the last few frames of the current scene.
video_duration = video_duration - 0.5
command = get_command(start_time, video_path, video_duration, output_path)
subprocess.run(command, check=True)
except Exception as e:
logger.warning(f"Clip video with {video_path}. Error is: {e}.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Video Splitting")
parser.add_argument(
"--video_metadata_path", type=str, default=None, help="The path to the video dataset metadata (csv/jsonl)."
)
parser.add_argument(
"--video_path_column",
type=str,
default="video_path",
help="The column contains the video path (an absolute path or a relative path w.r.t the video_folder).",
)
parser.add_argument("--video_folder", type=str, default="", help="The video folder.")
parser.add_argument("--output_folder", type=str, default="outputs")
parser.add_argument("--n_jobs", type=int, default=16)
parser.add_argument("--resolution_threshold", type=float, default=0, help="The resolution threshold.")
args = parser.parse_args()
video_metadata_df = pd.read_json(args.video_metadata_path, lines=True)
num_videos = len(video_metadata_df)
video_metadata_df["resolution"] = video_metadata_df["frame_size"].apply(lambda x: x[0] * x[1])
video_metadata_df = video_metadata_df[video_metadata_df["resolution"] >= args.resolution_threshold]
logger.info(f"Filter {num_videos - len(video_metadata_df)} videos with resolution smaller than {args.resolution_threshold}.")
video_path_list = video_metadata_df[args.video_path_column].to_list()
video_id_list = [Path(video_path).stem for video_path in video_path_list]
if len(video_id_list) != len(list(set(video_id_list))):
logger.warning("Duplicate file names exist in the input video path list.")
video_path_list = [os.path.join(args.video_folder, video_path) for video_path in video_path_list]
video_timecode_list = video_metadata_df["timecode_list"].to_list()
video_duration_list = video_metadata_df["duration"].to_list()
assert len(video_path_list) == len(video_timecode_list)
os.makedirs(args.output_folder, exist_ok=True)
args_list = [
(video_path, timecode_list, args.output_folder, video_duration)
for video_path, timecode_list, video_duration in zip(
video_path_list, video_timecode_list, video_duration_list
)
]
with Pool(args.n_jobs) as pool:
# results = list(tqdm(pool.imap(clip_video_star, args_list), total=len(video_path_list)))
results = pool.imap(clip_video_star, args_list)
for result in tqdm(results, total=len(video_path_list)):
pass |