yixin1121 commited on
Commit
7dc23ce
·
verified ·
1 Parent(s): 3a81605

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. VideoLoader.py +41 -10
VideoLoader.py CHANGED
@@ -3,6 +3,7 @@ import torch as th
3
  import os
4
  import numpy as np
5
  from decord import VideoReader, cpu
 
6
 
7
 
8
  class Normalize(object):
@@ -44,11 +45,11 @@ class VideoLoader:
44
  self.max_feats = 10
45
  self.features_dim = 768
46
 
47
- def _get_video_dim(self, video_path):
48
- vr = VideoReader(video_path, ctx=cpu(0))
49
- height, width, _ = vr[0].shape
50
- frame_rate = vr.get_avg_fps()
51
- return height, width, frame_rate
52
 
53
  def _get_output_dim(self, h, w):
54
  if isinstance(self.size, tuple) and len(self.size) == 2:
@@ -58,6 +59,19 @@ class VideoLoader:
58
  else:
59
  return self.size, int(w * self.size / h)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def _getvideo(self, video_path):
62
 
63
  if os.path.isfile(video_path):
@@ -78,15 +92,31 @@ class VideoLoader:
78
  }
79
  height, width = self._get_output_dim(h, w)
80
  # resize ##
81
- vr = VideoReader(video_path, ctx=cpu(0))
82
- video = vr.get_batch(range(0, len(vr), int(fr))).asnumpy()
83
- video = np.array([cv2.resize(frame, (width, height)) for frame in video])
84
- try:
85
 
 
 
 
 
 
 
86
  if self.centercrop:
87
  x = int((width - self.size) / 2.0)
88
  y = int((height - self.size) / 2.0)
89
- video = video[:, y:y+self.size, x:x+self.size, :]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  except:
92
  print("ffmpeg error at: {}".format(video_path))
@@ -96,6 +126,7 @@ class VideoLoader:
96
  }
97
  if self.centercrop and isinstance(self.size, int):
98
  height, width = self.size, self.size
 
99
  video = th.from_numpy(video.astype("float32"))
100
  video = video.permute(0, 3, 1, 2) # t,c,h,w
101
  else:
 
3
  import os
4
  import numpy as np
5
  from decord import VideoReader, cpu
6
+ import ffmpeg
7
 
8
 
9
  class Normalize(object):
 
45
  self.max_feats = 10
46
  self.features_dim = 768
47
 
48
+ # def _get_video_dim(self, video_path):
49
+ # vr = VideoReader(video_path, ctx=cpu(0))
50
+ # height, width, _ = vr[0].shape
51
+ # frame_rate = vr.get_avg_fps()
52
+ # return height, width, frame_rate
53
 
54
  def _get_output_dim(self, h, w):
55
  if isinstance(self.size, tuple) and len(self.size) == 2:
 
59
  else:
60
  return self.size, int(w * self.size / h)
61
 
62
+ def _get_video_dim(self, video_path):
63
+ probe = ffmpeg.probe(video_path)
64
+ video_stream = next(
65
+ (stream for stream in probe["streams"] if stream["codec_type"] == "video"),
66
+ None,
67
+ )
68
+ width = int(video_stream["width"])
69
+ height = int(video_stream["height"])
70
+ num, denum = video_stream["avg_frame_rate"].split("/")
71
+ frame_rate = int(num) / int(denum)
72
+ return height, width, frame_rate
73
+
74
+
75
  def _getvideo(self, video_path):
76
 
77
  if os.path.isfile(video_path):
 
92
  }
93
  height, width = self._get_output_dim(h, w)
94
  # resize ##
 
 
 
 
95
 
96
+ try:
97
+ cmd = (
98
+ ffmpeg.input(video_path)
99
+ .filter("fps", fps=self.framerate)
100
+ .filter("scale", width, height)
101
+ )
102
  if self.centercrop:
103
  x = int((width - self.size) / 2.0)
104
  y = int((height - self.size) / 2.0)
105
+ cmd = cmd.crop(x, y, self.size, self.size)
106
+ out, _ = cmd.output("pipe:", format="rawvideo", pix_fmt="rgb24").run(
107
+ capture_stdout=True, quiet=True
108
+ )
109
+
110
+ # try:
111
+ # vr = VideoReader(video_path, ctx=cpu(0))
112
+ # video = vr.get_batch(range(0, len(vr), int(fr))).asnumpy()
113
+ # video = np.array([cv2.resize(frame, (width, height)) for frame in video])
114
+
115
+
116
+ # if self.centercrop:
117
+ # x = int((width - self.size) / 2.0)
118
+ # y = int((height - self.size) / 2.0)
119
+ # video = video[:, y:y+self.size, x:x+self.size, :]
120
 
121
  except:
122
  print("ffmpeg error at: {}".format(video_path))
 
126
  }
127
  if self.centercrop and isinstance(self.size, int):
128
  height, width = self.size, self.size
129
+ video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])
130
  video = th.from_numpy(video.astype("float32"))
131
  video = video.permute(0, 3, 1, 2) # t,c,h,w
132
  else: