Update preprocessing.py
Browse files- preprocessing.py +32 -6
preprocessing.py
CHANGED
@@ -17,25 +17,51 @@ def read_video(video_base64, num_frames=24, target_size=(224, 224)):
|
|
17 |
frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
|
18 |
|
19 |
sampled_frames = sample_frames(frames, num_frames)
|
20 |
-
processed_frames =
|
|
|
21 |
return processed_frames
|
22 |
|
23 |
def sample_frames(frames, num_frames):
|
24 |
total_frames = len(frames)
|
|
|
25 |
if total_frames <= num_frames:
|
|
|
26 |
if total_frames < num_frames:
|
27 |
padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
|
28 |
-
|
29 |
else:
|
30 |
indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
|
31 |
-
|
32 |
-
return np.array(frames)
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
transform = transforms.Compose([
|
36 |
transforms.ToPILImage(),
|
37 |
transforms.Resize(target_size),
|
38 |
transforms.ToTensor()
|
39 |
])
|
40 |
processed_frames = [transform(frame) for frame in frames]
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
|
18 |
|
19 |
sampled_frames = sample_frames(frames, num_frames)
|
20 |
+
processed_frames = pad_and_resize(sampled_frames, target_size)
|
21 |
+
processed_frames = processed_frames.permute(1, 0, 2, 3) # (T, C, H, W) -> (C, T, H, W)
|
22 |
return processed_frames
|
23 |
|
24 |
def sample_frames(frames, num_frames):
|
25 |
total_frames = len(frames)
|
26 |
+
sampled_frames = list(frames)
|
27 |
if total_frames <= num_frames:
|
28 |
+
# sampled_frames = frames
|
29 |
if total_frames < num_frames:
|
30 |
padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
|
31 |
+
sampled_frames.extend(padding)
|
32 |
else:
|
33 |
indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
|
34 |
+
sampled_frames = [frames[i] for i in indices]
|
|
|
35 |
|
36 |
+
return np.array(sampled_frames)
|
37 |
+
|
38 |
+
|
39 |
+
# total_frames = len(frames)
|
40 |
+
# if total_frames <= num_frames:
|
41 |
+
# if total_frames < num_frames:
|
42 |
+
# padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
|
43 |
+
# frames.extend(padding)
|
44 |
+
# else:
|
45 |
+
# indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
|
46 |
+
# frames = [frames[i] for i in indices]
|
47 |
+
# return np.array(frames)
|
48 |
+
|
49 |
+
def pad_and_resize(frames, target_size):
|
50 |
transform = transforms.Compose([
|
51 |
transforms.ToPILImage(),
|
52 |
transforms.Resize(target_size),
|
53 |
transforms.ToTensor()
|
54 |
])
|
55 |
processed_frames = [transform(frame) for frame in frames]
|
56 |
+
|
57 |
+
return torch.stack(processed_frames)
|
58 |
+
|
59 |
+
|
60 |
+
# def pad_and_resize(frames, target_size):
|
61 |
+
# transform = transforms.Compose([
|
62 |
+
# transforms.ToPILImage(),
|
63 |
+
# transforms.Resize(target_size),
|
64 |
+
# transforms.ToTensor()
|
65 |
+
# ])
|
66 |
+
# processed_frames = [transform(frame) for frame in frames]
|
67 |
+
# return torch.stack(processed_frames).permute(1, 0, 2, 3) # (T, C, H, W) -> (C, T, H, W)
|