Upload folder using huggingface_hub
Browse files- VideoLoader.py +41 -10
VideoLoader.py
CHANGED
@@ -3,6 +3,7 @@ import torch as th
|
|
3 |
import os
|
4 |
import numpy as np
|
5 |
from decord import VideoReader, cpu
|
|
|
6 |
|
7 |
|
8 |
class Normalize(object):
|
@@ -44,11 +45,11 @@ class VideoLoader:
|
|
44 |
self.max_feats = 10
|
45 |
self.features_dim = 768
|
46 |
|
47 |
-
def _get_video_dim(self, video_path):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
def _get_output_dim(self, h, w):
|
54 |
if isinstance(self.size, tuple) and len(self.size) == 2:
|
@@ -58,6 +59,19 @@ class VideoLoader:
|
|
58 |
else:
|
59 |
return self.size, int(w * self.size / h)
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
def _getvideo(self, video_path):
|
62 |
|
63 |
if os.path.isfile(video_path):
|
@@ -78,15 +92,31 @@ class VideoLoader:
|
|
78 |
}
|
79 |
height, width = self._get_output_dim(h, w)
|
80 |
# resize ##
|
81 |
-
vr = VideoReader(video_path, ctx=cpu(0))
|
82 |
-
video = vr.get_batch(range(0, len(vr), int(fr))).asnumpy()
|
83 |
-
video = np.array([cv2.resize(frame, (width, height)) for frame in video])
|
84 |
-
try:
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
if self.centercrop:
|
87 |
x = int((width - self.size) / 2.0)
|
88 |
y = int((height - self.size) / 2.0)
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
except:
|
92 |
print("ffmpeg error at: {}".format(video_path))
|
@@ -96,6 +126,7 @@ class VideoLoader:
|
|
96 |
}
|
97 |
if self.centercrop and isinstance(self.size, int):
|
98 |
height, width = self.size, self.size
|
|
|
99 |
video = th.from_numpy(video.astype("float32"))
|
100 |
video = video.permute(0, 3, 1, 2) # t,c,h,w
|
101 |
else:
|
|
|
3 |
import os
|
4 |
import numpy as np
|
5 |
from decord import VideoReader, cpu
|
6 |
+
import ffmpeg
|
7 |
|
8 |
|
9 |
class Normalize(object):
|
|
|
45 |
self.max_feats = 10
|
46 |
self.features_dim = 768
|
47 |
|
48 |
+
# def _get_video_dim(self, video_path):
|
49 |
+
# vr = VideoReader(video_path, ctx=cpu(0))
|
50 |
+
# height, width, _ = vr[0].shape
|
51 |
+
# frame_rate = vr.get_avg_fps()
|
52 |
+
# return height, width, frame_rate
|
53 |
|
54 |
def _get_output_dim(self, h, w):
|
55 |
if isinstance(self.size, tuple) and len(self.size) == 2:
|
|
|
59 |
else:
|
60 |
return self.size, int(w * self.size / h)
|
61 |
|
62 |
+
def _get_video_dim(self, video_path):
|
63 |
+
probe = ffmpeg.probe(video_path)
|
64 |
+
video_stream = next(
|
65 |
+
(stream for stream in probe["streams"] if stream["codec_type"] == "video"),
|
66 |
+
None,
|
67 |
+
)
|
68 |
+
width = int(video_stream["width"])
|
69 |
+
height = int(video_stream["height"])
|
70 |
+
num, denum = video_stream["avg_frame_rate"].split("/")
|
71 |
+
frame_rate = int(num) / int(denum)
|
72 |
+
return height, width, frame_rate
|
73 |
+
|
74 |
+
|
75 |
def _getvideo(self, video_path):
|
76 |
|
77 |
if os.path.isfile(video_path):
|
|
|
92 |
}
|
93 |
height, width = self._get_output_dim(h, w)
|
94 |
# resize ##
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
try:
|
97 |
+
cmd = (
|
98 |
+
ffmpeg.input(video_path)
|
99 |
+
.filter("fps", fps=self.framerate)
|
100 |
+
.filter("scale", width, height)
|
101 |
+
)
|
102 |
if self.centercrop:
|
103 |
x = int((width - self.size) / 2.0)
|
104 |
y = int((height - self.size) / 2.0)
|
105 |
+
cmd = cmd.crop(x, y, self.size, self.size)
|
106 |
+
out, _ = cmd.output("pipe:", format="rawvideo", pix_fmt="rgb24").run(
|
107 |
+
capture_stdout=True, quiet=True
|
108 |
+
)
|
109 |
+
|
110 |
+
# try:
|
111 |
+
# vr = VideoReader(video_path, ctx=cpu(0))
|
112 |
+
# video = vr.get_batch(range(0, len(vr), int(fr))).asnumpy()
|
113 |
+
# video = np.array([cv2.resize(frame, (width, height)) for frame in video])
|
114 |
+
|
115 |
+
|
116 |
+
# if self.centercrop:
|
117 |
+
# x = int((width - self.size) / 2.0)
|
118 |
+
# y = int((height - self.size) / 2.0)
|
119 |
+
# video = video[:, y:y+self.size, x:x+self.size, :]
|
120 |
|
121 |
except:
|
122 |
print("ffmpeg error at: {}".format(video_path))
|
|
|
126 |
}
|
127 |
if self.centercrop and isinstance(self.size, int):
|
128 |
height, width = self.size, self.size
|
129 |
+
video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])
|
130 |
video = th.from_numpy(video.astype("float32"))
|
131 |
video = video.permute(0, 3, 1, 2) # t,c,h,w
|
132 |
else:
|