File size: 4,519 Bytes
19fe404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gc
import os
import random
import urllib.request as request
from contextlib import contextmanager
from pathlib import Path
from typing import List, Tuple, Optional

import numpy as np
import pandas as pd
from decord import VideoReader
from PIL import Image

ALL_VIDEO_EXT = set([".mp4", ".webm", ".mkv", ".avi", ".flv", ".mov"])


def get_video_path_list(
    video_folder: Optional[str]=None,
    video_metadata_path: Optional[str]=None,
    video_path_column: Optional[str]=None
) -> List[str]:
    """Get all video (absolute) path list from the video folder or the video metadata file.

    Args:
        video_folder (str): The absolute path of the folder (including sub-folders) containing all the required video files.
        video_metadata_path (str): The absolute path of the video metadata file containing video path list.
        video_path_column (str): The column/key for the corresponding video path in the video metadata file (csv/jsonl).
    """
    if video_folder is None and video_metadata_path is None:
        raise ValueError("Either the video_input or the video_metadata_path should be specified.")
    if video_metadata_path is not None:
        if video_metadata_path.endswith(".csv"):
            if video_path_column is None:
                raise ValueError("The video_path_column can not be None if provided a csv file.")
            metadata_df = pd.read_csv(video_metadata_path)
            video_path_list = metadata_df[video_path_column].tolist()
        elif video_metadata_path.endswith(".jsonl"):
            if video_path_column is None:
                raise ValueError("The video_path_column can not be None if provided a jsonl file.")
            metadata_df = pd.read_json(video_metadata_path, lines=True)
            video_path_list = metadata_df[video_path_column].tolist()
        elif video_metadata_path.endswith(".txt"):
            with open(video_metadata_path, "r", encoding="utf-8") as f:
                video_path_list = [line.strip() for line in f]
        else:
            raise ValueError("The video_metadata_path must end with `.csv`, `.jsonl` or `.txt`.")
        if video_folder is not None:
            video_path_list = [os.path.join(video_folder, video_path) for video_path in video_path_list]
        return video_path_list

    if os.path.isfile(video_folder):
        video_path_list = []
        if video_folder.endswith("mp4"):
            video_path_list.append(video_folder)
        elif video_folder.endswith("txt"):
            with open(video_folder, "r") as file:
                video_path_list += [line.strip() for line in file.readlines()]
        return video_path_list

    elif video_folder is not None:
        video_path_list = []
        for ext in ALL_VIDEO_EXT:
            video_path_list.extend(Path(video_folder).rglob(f"*{ext}"))
        video_path_list = [str(video_path) for video_path in video_path_list]
        return video_path_list


@contextmanager
def video_reader(*args, **kwargs):
    """A context manager to solve the memory leak of decord.
    """
    vr = VideoReader(*args, **kwargs)
    try:
        yield vr
    finally:
        del vr
        gc.collect()


def extract_frames(
    video_path: str, sample_method: str = "mid", num_sampled_frames: int = -1, sample_stride: int = -1
) -> Optional[Tuple[List[int], List[Image.Image]]]:
    with video_reader(video_path, num_threads=2) as vr:
        if sample_method == "mid":
            sampled_frame_idx_list = [len(vr) // 2]
        elif sample_method == "uniform":
            sampled_frame_idx_list = np.linspace(0, len(vr), num_sampled_frames, endpoint=False, dtype=int)
        elif sample_method == "random":
            clip_length = min(len(vr), (num_sampled_frames - 1) * sample_stride + 1)
            start_idx = random.randint(0, len(vr) - clip_length)
            sampled_frame_idx_list = np.linspace(start_idx, start_idx + clip_length - 1, num_sampled_frames, dtype=int)
        else:
            raise ValueError("The sample_method must be mid, uniform or random.")
        sampled_frame_list = vr.get_batch(sampled_frame_idx_list).asnumpy()
        sampled_frame_list = [Image.fromarray(frame) for frame in sampled_frame_list]

        return list(sampled_frame_idx_list), sampled_frame_list


def download_video(
    video_url: str, 
    save_path: str) -> bool:
    try:
        request.urlretrieve(video_url, save_path)
        return os.path.isfile(save_path)
    except Exception as e:
        print(e, video_url)
        return False