Spaces:
Running
on
Zero
Running
on
Zero
Update src/utils.py
Browse files- src/utils.py +71 -50
src/utils.py
CHANGED
@@ -1,50 +1,71 @@
|
|
1 |
-
# Importing the requirements
|
2 |
-
from PIL import Image
|
3 |
-
from decord import VideoReader, cpu
|
4 |
-
|
5 |
-
|
6 |
-
# Maximum number of frames to use
|
7 |
-
MAX_NUM_FRAMES = 64 # If CUDA OOM, set a smaller number
|
8 |
-
|
9 |
-
|
10 |
-
def
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importing the requirements
|
2 |
+
from PIL import Image
|
3 |
+
from decord import VideoReader, cpu
|
4 |
+
|
5 |
+
|
6 |
+
# Maximum number of frames to use
|
7 |
+
MAX_NUM_FRAMES = 64 # If CUDA OOM, set a smaller number
|
8 |
+
|
9 |
+
|
10 |
+
def encode_image(image):
|
11 |
+
if not isinstance(image, Image.Image):
|
12 |
+
if hasattr(image, 'path'):
|
13 |
+
image = Image.open(image.path).convert("RGB")
|
14 |
+
else:
|
15 |
+
image = Image.open(image.file.path).convert("RGB")
|
16 |
+
# resize to max_size
|
17 |
+
max_size = 448*16
|
18 |
+
if max(image.size) > max_size:
|
19 |
+
w,h = image.size
|
20 |
+
if w > h:
|
21 |
+
new_w = max_size
|
22 |
+
new_h = int(h * max_size / w)
|
23 |
+
else:
|
24 |
+
new_h = max_size
|
25 |
+
new_w = int(w * max_size / h)
|
26 |
+
image = image.resize((new_w, new_h), resample=Image.BICUBIC)
|
27 |
+
return image
|
28 |
+
|
29 |
+
|
30 |
+
def encode_video(video_path):
|
31 |
+
"""
|
32 |
+
Encodes a video file into a list of frames.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
video_path (str): The path to the video file.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
list: A list of frames, where each frame is represented as an Image object.
|
39 |
+
"""
|
40 |
+
|
41 |
+
def uniform_sample(l, n):
|
42 |
+
"""
|
43 |
+
Uniformly samples elements from a list.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
- l (list): The input list.
|
47 |
+
- n (int): The number of elements to sample.
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
list: A list of sampled elements.
|
51 |
+
"""
|
52 |
+
gap = len(l) / n
|
53 |
+
idxs = [int(i * gap + gap / 2) for i in range(n)]
|
54 |
+
return [l[i] for i in idxs]
|
55 |
+
|
56 |
+
# Read the video file and sample frames
|
57 |
+
vr = VideoReader(video_path, ctx=cpu(0))
|
58 |
+
sample_fps = round(vr.get_avg_fps() / 1) # FPS
|
59 |
+
frame_idx = [i for i in range(0, len(vr), sample_fps)]
|
60 |
+
|
61 |
+
# Uniformly sample frames if the number of frames is too large
|
62 |
+
if len(frame_idx) > MAX_NUM_FRAMES:
|
63 |
+
frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
|
64 |
+
|
65 |
+
# Extract frames from the video
|
66 |
+
frames = vr.get_batch(frame_idx).asnumpy()
|
67 |
+
frames = [Image.fromarray(v.astype("uint8")) for v in frames]
|
68 |
+
frames = [encode_image(v) for v in frames]
|
69 |
+
|
70 |
+
# Return video frames
|
71 |
+
return frames
|