donghuna commited on
Commit
69fee66
·
verified ·
1 Parent(s): 0972a44

Update preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +32 -6
preprocessing.py CHANGED
@@ -17,25 +17,51 @@ def read_video(video_base64, num_frames=24, target_size=(224, 224)):
17
  frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
18
 
19
  sampled_frames = sample_frames(frames, num_frames)
20
- processed_frames = preprocess_frames(sampled_frames, target_size)
 
21
  return processed_frames
22
 
23
  def sample_frames(frames, num_frames):
24
  total_frames = len(frames)
 
25
  if total_frames <= num_frames:
 
26
  if total_frames < num_frames:
27
  padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
28
- frames.extend(padding)
29
  else:
30
  indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
31
- frames = [frames[i] for i in indices]
32
- return np.array(frames)
33
 
34
- def preprocess_frames(frames, target_size):
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  transform = transforms.Compose([
36
  transforms.ToPILImage(),
37
  transforms.Resize(target_size),
38
  transforms.ToTensor()
39
  ])
40
  processed_frames = [transform(frame) for frame in frames]
41
- return torch.stack(processed_frames).permute(1, 0, 2, 3).numpy() # (T, C, H, W) -> (C, T, H, W)
 
 
 
 
 
 
 
 
 
 
 
 
17
  frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
18
 
19
  sampled_frames = sample_frames(frames, num_frames)
20
+ processed_frames = pad_and_resize(sampled_frames, target_size)
21
+ processed_frames = processed_frames.permute(1, 0, 2, 3) # (T, C, H, W) -> (C, T, H, W)
22
  return processed_frames
23
 
24
  def sample_frames(frames, num_frames):
25
  total_frames = len(frames)
26
+ sampled_frames = list(frames)
27
  if total_frames <= num_frames:
28
+ # sampled_frames = frames
29
  if total_frames < num_frames:
30
  padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
31
+ sampled_frames.extend(padding)
32
  else:
33
  indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
34
+ sampled_frames = [frames[i] for i in indices]
 
35
 
36
+ return np.array(sampled_frames)
37
+
38
+
39
+ # total_frames = len(frames)
40
+ # if total_frames <= num_frames:
41
+ # if total_frames < num_frames:
42
+ # padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
43
+ # frames.extend(padding)
44
+ # else:
45
+ # indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
46
+ # frames = [frames[i] for i in indices]
47
+ # return np.array(frames)
48
+
49
+ def pad_and_resize(frames, target_size):
50
  transform = transforms.Compose([
51
  transforms.ToPILImage(),
52
  transforms.Resize(target_size),
53
  transforms.ToTensor()
54
  ])
55
  processed_frames = [transform(frame) for frame in frames]
56
+
57
+ return torch.stack(processed_frames)
58
+
59
+
60
+ # def pad_and_resize(frames, target_size):
61
+ # transform = transforms.Compose([
62
+ # transforms.ToPILImage(),
63
+ # transforms.Resize(target_size),
64
+ # transforms.ToTensor()
65
+ # ])
66
+ # processed_frames = [transform(frame) for frame in frames]
67
+ # return torch.stack(processed_frames).permute(1, 0, 2, 3) # (T, C, H, W) -> (C, T, H, W)