File size: 8,472 Bytes
19fe404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import ast
import argparse
import gc
import os
from contextlib import contextmanager
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from natsort import natsorted
from tqdm import tqdm

from utils.logger import logger
from utils.video_utils import get_video_path_list


@contextmanager
def VideoCapture(video_path):
    cap = cv2.VideoCapture(video_path)
    try:
        yield cap
    finally:
        cap.release()
        del cap
        gc.collect()


def compute_motion_score(video_path):
    video_motion_scores = []
    sampling_fps = 2

    try:
        with VideoCapture(video_path) as cap:
            fps = cap.get(cv2.CAP_PROP_FPS)
            valid_fps = min(max(sampling_fps, 1), fps)
            frame_interval = int(fps / valid_fps)
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            # if cannot get the second frame, use the last one
            frame_interval = min(frame_interval, total_frames - 1)

            prev_frame = None
            frame_count = -1
            while cap.isOpened():
                ret, frame = cap.read()
                frame_count += 1

                if not ret:
                    break

                # skip middle frames
                if frame_count % frame_interval != 0:
                    continue

                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                if prev_frame is None:
                    prev_frame = gray_frame
                    continue

                flow = cv2.calcOpticalFlowFarneback(
                    prev_frame,
                    gray_frame,
                    None,
                    pyr_scale=0.5,
                    levels=3,
                    winsize=15,
                    iterations=3,
                    poly_n=5,
                    poly_sigma=1.2,
                    flags=0,
                )
                mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
                frame_motion_score = np.mean(mag)
                video_motion_scores.append(frame_motion_score)
                prev_frame = gray_frame

            video_meta_info = {
                "video_path": Path(video_path).name,
                "motion_score": round(float(np.mean(video_motion_scores)), 5),
            }
            return video_meta_info

    except Exception as e:
        print(f"Compute motion score for video {video_path} with error: {e}.")


def parse_args():
    parser = argparse.ArgumentParser(description="Compute the motion score of the videos.")
    parser.add_argument("--video_folder", type=str, default="", help="The video folder.")
    parser.add_argument(
        "--video_metadata_path", type=str, default=None, help="The path to the video dataset metadata (csv/jsonl)."
    )
    parser.add_argument(
        "--video_path_column",
        type=str,
        default="video_path",
        help="The column contains the video path (an absolute path or a relative path w.r.t the video_folder).",
    )
    parser.add_argument("--saved_path", type=str, required=True, help="The save path to the output results (csv/jsonl).")
    parser.add_argument("--saved_freq", type=int, default=100, help="The frequency to save the output results.")
    parser.add_argument("--n_jobs", type=int, default=1, help="The number of concurrent processes.")

    parser.add_argument(
        "--asethetic_score_metadata_path", type=str, default=None, help="The path to the video quality metadata (csv/jsonl)."
    )
    parser.add_argument("--asethetic_score_threshold", type=float, default=4.0, help="The asethetic score threshold.")
    parser.add_argument(
        "--text_score_metadata_path", type=str, default=None, help="The path to the video text score metadata (csv/jsonl)."
    )
    parser.add_argument("--text_score_threshold", type=float, default=0.02, help="The text threshold.")

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    video_path_list = get_video_path_list(
        video_folder=args.video_folder,
        video_metadata_path=args.video_metadata_path,
        video_path_column=args.video_path_column
    )

    if not (args.saved_path.endswith(".csv") or args.saved_path.endswith(".jsonl")):
        raise ValueError("The saved_path must end with .csv or .jsonl.")
    
    if os.path.exists(args.saved_path):
        if args.saved_path.endswith(".csv"):
            saved_metadata_df = pd.read_csv(args.saved_path)
        elif args.saved_path.endswith(".jsonl"):
            saved_metadata_df = pd.read_json(args.saved_path, lines=True)
        saved_video_path_list = saved_metadata_df[args.video_path_column].tolist()
        saved_video_path_list = [os.path.join(args.video_folder, video_path) for video_path in saved_video_path_list]
        
        video_path_list = list(set(video_path_list).difference(set(saved_video_path_list)))
        # Sorting to guarantee the same result for each process.
        video_path_list = natsorted(video_path_list)
        logger.info(f"Resume from {args.saved_path}: {len(saved_video_path_list)} processed and {len(video_path_list)} to be processed.")
    
    if args.asethetic_score_metadata_path is not None:
        if args.asethetic_score_metadata_path.endswith(".csv"):
            asethetic_score_df = pd.read_csv(args.asethetic_score_metadata_path)
        elif args.asethetic_score_metadata_path.endswith(".jsonl"):
            asethetic_score_df = pd.read_json(args.asethetic_score_metadata_path, lines=True)

        # In pandas, csv will save lists as strings, whereas jsonl will not.
        asethetic_score_df["aesthetic_score"] = asethetic_score_df["aesthetic_score"].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )
        asethetic_score_df["aesthetic_score_mean"] = asethetic_score_df["aesthetic_score"].apply(lambda x: sum(x) / len(x))
        filtered_asethetic_score_df = asethetic_score_df[asethetic_score_df["aesthetic_score_mean"] < args.asethetic_score_threshold]
        filtered_video_path_list = filtered_asethetic_score_df[args.video_path_column].tolist()
        filtered_video_path_list = [os.path.join(args.video_folder, video_path) for video_path in filtered_video_path_list]

        video_path_list = list(set(video_path_list).difference(set(filtered_video_path_list)))
        # Sorting to guarantee the same result for each process.
        video_path_list = natsorted(video_path_list)
        logger.info(f"Load {args.asethetic_score_metadata_path} and filter {len(filtered_video_path_list)} videos.")
    
    if args.text_score_metadata_path is not None:
        if args.text_score_metadata_path.endswith(".csv"):
            text_score_df = pd.read_csv(args.text_score_metadata_path)
        elif args.text_score_metadata_path.endswith(".jsonl"):
            text_score_df = pd.read_json(args.text_score_metadata_path, lines=True)

        filtered_text_score_df = text_score_df[text_score_df["text_score"] > args.text_score_threshold]
        filtered_video_path_list = filtered_text_score_df[args.video_path_column].tolist()
        filtered_video_path_list = [os.path.join(args.video_folder, video_path) for video_path in filtered_video_path_list]

        video_path_list = list(set(video_path_list).difference(set(filtered_video_path_list)))
        # Sorting to guarantee the same result for each process.
        video_path_list = natsorted(video_path_list)
        logger.info(f"Load {args.text_score_metadata_path} and filter {len(filtered_video_path_list)} videos.")

    for i in tqdm(range(0, len(video_path_list), args.saved_freq)):
        result_list = Parallel(n_jobs=args.n_jobs, backend="threading")(
            delayed(compute_motion_score)(video_path) for video_path in tqdm(video_path_list[i: i + args.saved_freq])
        )
        result_list = [result for result in result_list if result is not None]
        if len(result_list) == 0:
            continue

        result_df = pd.DataFrame(result_list)
        if args.saved_path.endswith(".csv"):
            header = False if os.path.exists(args.saved_path) else True
            result_df.to_csv(args.saved_path, header=header, index=False, mode="a")
        elif args.saved_path.endswith(".jsonl"):
            result_df.to_json(args.saved_path, orient="records", lines=True, mode="a")
        logger.info(f"Save result to {args.saved_path}.")


if __name__ == "__main__":
    main()