Spaces:

yixin1121
/

T-MoENet

Runtime error

App Files Files Community

yixin1121 commited on Jul 14, 2024

Commit

7dc23ce

verified ·

1 Parent(s): 3a81605

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

VideoLoader.py +41 -10

VideoLoader.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch as th
 import os
 import numpy as np
 from decord import VideoReader, cpu
 class Normalize(object):
@@ -44,11 +45,11 @@ class VideoLoader:
         self.max_feats = 10
         self.features_dim = 768
-    def _get_video_dim(self, video_path):
-        vr = VideoReader(video_path, ctx=cpu(0))
-        height, width, _ = vr[0].shape
-        frame_rate = vr.get_avg_fps()
-        return height, width, frame_rate
     def _get_output_dim(self, h, w):
         if isinstance(self.size, tuple) and len(self.size) == 2:
@@ -58,6 +59,19 @@ class VideoLoader:
         else:
             return self.size, int(w * self.size / h)
     def _getvideo(self, video_path):
         if os.path.isfile(video_path):
@@ -78,15 +92,31 @@ class VideoLoader:
                 }
             height, width = self._get_output_dim(h, w)
             # resize ##
-            vr = VideoReader(video_path, ctx=cpu(0))
-            video = vr.get_batch(range(0, len(vr), int(fr))).asnumpy()
-            video = np.array([cv2.resize(frame, (width, height)) for frame in video])
-            try:
                 if self.centercrop:
                     x = int((width - self.size) / 2.0)
                     y = int((height - self.size) / 2.0)
-                    video = video[:, y:y+self.size, x:x+self.size, :]
             except:
                 print("ffmpeg error at: {}".format(video_path))
@@ -96,6 +126,7 @@ class VideoLoader:
                 }
             if self.centercrop and isinstance(self.size, int):
                 height, width = self.size, self.size
             video = th.from_numpy(video.astype("float32"))
             video = video.permute(0, 3, 1, 2) # t,c,h,w
         else:

 import os
 import numpy as np
 from decord import VideoReader, cpu
+import ffmpeg
 class Normalize(object):
         self.max_feats = 10
         self.features_dim = 768
+    # def _get_video_dim(self, video_path):
+    #     vr = VideoReader(video_path, ctx=cpu(0))
+    #     height, width, _ = vr[0].shape
+    #     frame_rate = vr.get_avg_fps()
+    #     return height, width, frame_rate
     def _get_output_dim(self, h, w):
         if isinstance(self.size, tuple) and len(self.size) == 2:
         else:
             return self.size, int(w * self.size / h)
+    def _get_video_dim(self, video_path):
+        probe = ffmpeg.probe(video_path)
+        video_stream = next(
+            (stream for stream in probe["streams"] if stream["codec_type"] == "video"),
+            None,
+        )
+        width = int(video_stream["width"])
+        height = int(video_stream["height"])
+        num, denum = video_stream["avg_frame_rate"].split("/")
+        frame_rate = int(num) / int(denum)
+        return height, width, frame_rate
     def _getvideo(self, video_path):
         if os.path.isfile(video_path):
                 }
             height, width = self._get_output_dim(h, w)
             # resize ##
+            try:
+                cmd = (
+                    ffmpeg.input(video_path)
+                    .filter("fps", fps=self.framerate)
+                    .filter("scale", width, height)
+                )
                 if self.centercrop:
                     x = int((width - self.size) / 2.0)
                     y = int((height - self.size) / 2.0)
+                    cmd = cmd.crop(x, y, self.size, self.size)
+                out, _ = cmd.output("pipe:", format="rawvideo", pix_fmt="rgb24").run(
+                    capture_stdout=True, quiet=True
+                )
+            # try:
+            #     vr = VideoReader(video_path, ctx=cpu(0))
+            #     video = vr.get_batch(range(0, len(vr), int(fr))).asnumpy()
+            #     video = np.array([cv2.resize(frame, (width, height)) for frame in video])
+            #     if self.centercrop:
+            #         x = int((width - self.size) / 2.0)
+            #         y = int((height - self.size) / 2.0)
+            #         video = video[:, y:y+self.size, x:x+self.size, :]
             except:
                 print("ffmpeg error at: {}".format(video_path))
                 }
             if self.centercrop and isinstance(self.size, int):
                 height, width = self.size, self.size
+            video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])
             video = th.from_numpy(video.astype("float32"))
             video = video.permute(0, 3, 1, 2) # t,c,h,w
         else: