headscratchertm commited on
Commit
816f401
·
1 Parent(s): ddab39e

added Unet arch from original Repo

Browse files
Files changed (4) hide show
  1. frames.py +17 -5
  2. info.txt +7 -0
  3. main.py +9 -0
  4. model.py +361 -0
frames.py CHANGED
@@ -1,16 +1,28 @@
1
  import cv2
2
  import os
3
  def extract_frames(url_path,output_dir):
 
 
 
 
 
 
 
4
  os.makedirs(output_dir, exist_ok=True)
5
  frame_count=0
6
  cap=cv2.VideoCapture(url_path)
7
- while cap.isOpened() and frame_count<10:
8
- ret,frame=cap.read()
 
 
9
  if not ret:
10
  break
11
  frame_name=f"{frame_count}.png"
12
- cv2.imwrite(os.path.join(output_dir, frame_name), frame)
13
  frame_count+=1
 
14
  cap.release()
15
- extract_frames("C:/Users/BRIDGES/Downloads/Video1.mp4","output")
16
- # this is a test change to merge later
 
 
 
 
1
  import cv2
2
  import os
3
  def extract_frames(url_path,output_dir):
4
+ '''
5
+ Acts as initial feed into the SuperSlomo Model
6
+ The Frames are stored in an output directory which is then loaded into the SuperSlomo Model.
7
+ :param url_path:
8
+ :param output_dir:
9
+ :return: None
10
+ '''
11
  os.makedirs(output_dir, exist_ok=True)
12
  frame_count=0
13
  cap=cv2.VideoCapture(url_path)
14
+ total_frames=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
15
+ fps=int(cap.get(cv2.CAP_PROP_FPS))
16
+ while cap.isOpened():
17
+ ret,frame=cap.read() # frame is a numpy array
18
  if not ret:
19
  break
20
  frame_name=f"{frame_count}.png"
 
21
  frame_count+=1
22
+ cv2.imwrite(os.path.join(output_dir, frame_name), frame)
23
  cap.release()
24
+ def downsample(video_path,output_dir,target_fps):
25
+ pass
26
+ if __name__=="__main__": # sets the __name__ variable to __main__ for this script
27
+
28
+ extract_frames("Test.mp4","output")
info.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ we need to decide how many frames our output video should have
2
+ now assume that the video is 1 min long at 30 fps.
3
+ k factor=fps_output/fps_input
4
+ k=90/30
5
+ k=3
6
+ # the output video will have T(total time(sec))x fps_output=60x90=5400
7
+
main.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ def solve():
3
+ checkpoint=torch.load("SuperSloMo.ckpt")
4
+ checkpoint.eval()
5
+ print(checkpoint)
6
+ def main():
7
+ solve()
8
+ if __name__=="__main__":
9
+ main()
model.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision
3
+ import torchvision.transforms as transforms
4
+ import torch.optim as optim
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ import numpy as np
8
+
9
+
10
+ class down(nn.Module):
11
+ """
12
+ A class for creating neural network blocks containing layers:
13
+
14
+ Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
15
+
16
+ This is used in the UNet Class to create a UNet like NN architecture.
17
+
18
+ ...
19
+
20
+ Methods
21
+ -------
22
+ forward(x)
23
+ Returns output tensor after passing input `x` to the neural network
24
+ block.
25
+ """
26
+
27
+
28
+ def __init__(self, inChannels, outChannels, filterSize):
29
+ """
30
+ Parameters
31
+ ----------
32
+ inChannels : int
33
+ number of input channels for the first convolutional layer.
34
+ outChannels : int
35
+ number of output channels for the first convolutional layer.
36
+ This is also used as input and output channels for the
37
+ second convolutional layer.
38
+ filterSize : int
39
+ filter size for the convolution filter. input N would create
40
+ a N x N filter.
41
+ """
42
+
43
+
44
+ super(down, self).__init__()
45
+ # Initialize convolutional layers.
46
+ self.conv1 = nn.Conv2d(inChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
47
+ self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
48
+
49
+ def forward(self, x):
50
+ """
51
+ Returns output tensor after passing input `x` to the neural network
52
+ block.
53
+
54
+ Parameters
55
+ ----------
56
+ x : tensor
57
+ input to the NN block.
58
+
59
+ Returns
60
+ -------
61
+ tensor
62
+ output of the NN block.
63
+ """
64
+
65
+
66
+ # Average pooling with kernel size 2 (2 x 2).
67
+ x = F.avg_pool2d(x, 2)
68
+ # Convolution + Leaky ReLU
69
+ x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
70
+ # Convolution + Leaky ReLU
71
+ x = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
72
+ return x
73
+
74
+ class up(nn.Module):
75
+ """
76
+ A class for creating neural network blocks containing layers:
77
+
78
+ Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
79
+
80
+ This is used in the UNet Class to create a UNet like NN architecture.
81
+
82
+ ...
83
+
84
+ Methods
85
+ -------
86
+ forward(x, skpCn)
87
+ Returns output tensor after passing input `x` to the neural network
88
+ block.
89
+ """
90
+
91
+
92
+ def __init__(self, inChannels, outChannels):
93
+ """
94
+ Parameters
95
+ ----------
96
+ inChannels : int
97
+ number of input channels for the first convolutional layer.
98
+ outChannels : int
99
+ number of output channels for the first convolutional layer.
100
+ This is also used for setting input and output channels for
101
+ the second convolutional layer.
102
+ """
103
+
104
+
105
+ super(up, self).__init__()
106
+ # Initialize convolutional layers.
107
+ self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1)
108
+ # (2 * outChannels) is used for accommodating skip connection.
109
+ self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
110
+
111
+ def forward(self, x, skpCn):
112
+ """
113
+ Returns output tensor after passing input `x` to the neural network
114
+ block.
115
+
116
+ Parameters
117
+ ----------
118
+ x : tensor
119
+ input to the NN block.
120
+ skpCn : tensor
121
+ skip connection input to the NN block.
122
+
123
+ Returns
124
+ -------
125
+ tensor
126
+ output of the NN block.
127
+ """
128
+
129
+ # Bilinear interpolation with scaling 2.
130
+ x = F.interpolate(x, scale_factor=2, mode='bilinear')
131
+ # Convolution + Leaky ReLU
132
+ x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
133
+ # Convolution + Leaky ReLU on (`x`, `skpCn`)
134
+ x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope = 0.1)
135
+ return x
136
+
137
+
138
+
139
+ class UNet(nn.Module):
140
+ """
141
+ A class for creating UNet like architecture as specified by the
142
+ Super SloMo paper.
143
+
144
+ ...
145
+
146
+ Methods
147
+ -------
148
+ forward(x)
149
+ Returns output tensor after passing input `x` to the neural network
150
+ block.
151
+ """
152
+
153
+
154
+ def __init__(self, inChannels, outChannels):
155
+ """
156
+ Parameters
157
+ ----------
158
+ inChannels : int
159
+ number of input channels for the UNet.
160
+ outChannels : int
161
+ number of output channels for the UNet.
162
+ """
163
+
164
+
165
+ super(UNet, self).__init__()
166
+ # Initialize neural network blocks.
167
+ self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3)
168
+ self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3)
169
+ self.down1 = down(32, 64, 5)
170
+ self.down2 = down(64, 128, 3)
171
+ self.down3 = down(128, 256, 3)
172
+ self.down4 = down(256, 512, 3)
173
+ self.down5 = down(512, 512, 3)
174
+ self.up1 = up(512, 512)
175
+ self.up2 = up(512, 256)
176
+ self.up3 = up(256, 128)
177
+ self.up4 = up(128, 64)
178
+ self.up5 = up(64, 32)
179
+ self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)
180
+
181
+ def forward(self, x):
182
+ """
183
+ Returns output tensor after passing input `x` to the neural network.
184
+
185
+ Parameters
186
+ ----------
187
+ x : tensor
188
+ input to the UNet.
189
+
190
+ Returns
191
+ -------
192
+ tensor
193
+ output of the UNet.
194
+ """
195
+
196
+
197
+ x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
198
+ s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
199
+ s2 = self.down1(s1)
200
+ s3 = self.down2(s2)
201
+ s4 = self.down3(s3)
202
+ s5 = self.down4(s4)
203
+ x = self.down5(s5)
204
+ x = self.up1(x, s5)
205
+ x = self.up2(x, s4)
206
+ x = self.up3(x, s3)
207
+ x = self.up4(x, s2)
208
+ x = self.up5(x, s1)
209
+ x = F.leaky_relu(self.conv3(x), negative_slope = 0.1)
210
+ return x
211
+
212
+
213
+ class backWarp(nn.Module):
214
+ """
215
+ A class for creating a backwarping object.
216
+
217
+ This is used for backwarping to an image:
218
+
219
+ Given optical flow from frame I0 to I1 --> F_0_1 and frame I1,
220
+ it generates I0 <-- backwarp(F_0_1, I1).
221
+
222
+ ...
223
+
224
+ Methods
225
+ -------
226
+ forward(x)
227
+ Returns output tensor after passing input `img` and `flow` to the backwarping
228
+ block.
229
+ """
230
+
231
+
232
+ def __init__(self, W, H, device):
233
+ """
234
+ Parameters
235
+ ----------
236
+ W : int
237
+ width of the image.
238
+ H : int
239
+ height of the image.
240
+ device : device
241
+ computation device (cpu/cuda).
242
+ """
243
+
244
+
245
+ super(backWarp, self).__init__()
246
+ # create a grid
247
+ gridX, gridY = np.meshgrid(np.arange(W), np.arange(H))
248
+ self.W = W
249
+ self.H = H
250
+ self.gridX = torch.tensor(gridX, requires_grad=False, device=device)
251
+ self.gridY = torch.tensor(gridY, requires_grad=False, device=device)
252
+
253
+ def forward(self, img, flow):
254
+ """
255
+ Returns output tensor after passing input `img` and `flow` to the backwarping
256
+ block.
257
+ I0 = backwarp(I1, F_0_1)
258
+
259
+ Parameters
260
+ ----------
261
+ img : tensor
262
+ frame I1.
263
+ flow : tensor
264
+ optical flow from I0 and I1: F_0_1.
265
+
266
+ Returns
267
+ -------
268
+ tensor
269
+ frame I0.
270
+ """
271
+
272
+
273
+ # Extract horizontal and vertical flows.
274
+ u = flow[:, 0, :, :]
275
+ v = flow[:, 1, :, :]
276
+ x = self.gridX.unsqueeze(0).expand_as(u).float() + u
277
+ y = self.gridY.unsqueeze(0).expand_as(v).float() + v
278
+ # range -1 to 1
279
+ x = 2*(x/self.W - 0.5)
280
+ y = 2*(y/self.H - 0.5)
281
+ # stacking X and Y
282
+ grid = torch.stack((x,y), dim=3)
283
+ # Sample pixels using bilinear interpolation.
284
+ imgOut = torch.nn.functional.grid_sample(img, grid)
285
+ return imgOut
286
+
287
+
288
+ # Creating an array of `t` values for the 7 intermediate frames between
289
+ # reference frames I0 and I1.
290
+ t = np.linspace(0.125, 0.875, 7)
291
+
292
+ def getFlowCoeff (indices, device):
293
+ """
294
+ Gets flow coefficients used for calculating intermediate optical
295
+ flows from optical flows between I0 and I1: F_0_1 and F_1_0.
296
+
297
+ F_t_0 = C00 x F_0_1 + C01 x F_1_0
298
+ F_t_1 = C10 x F_0_1 + C11 x F_1_0
299
+
300
+ where,
301
+ C00 = -(1 - t) x t
302
+ C01 = t x t
303
+ C10 = (1 - t) x (1 - t)
304
+ C11 = -t x (1 - t)
305
+
306
+ Parameters
307
+ ----------
308
+ indices : tensor
309
+ indices corresponding to the intermediate frame positions
310
+ of all samples in the batch.
311
+ device : device
312
+ computation device (cpu/cuda).
313
+
314
+ Returns
315
+ -------
316
+ tensor
317
+ coefficients C00, C01, C10, C11.
318
+ """
319
+
320
+
321
+ # Convert indices tensor to numpy array
322
+ ind = indices.detach().numpy()
323
+ C11 = C00 = - (1 - (t[ind])) * (t[ind])
324
+ C01 = (t[ind]) * (t[ind])
325
+ C10 = (1 - (t[ind])) * (1 - (t[ind]))
326
+ return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device)
327
+
328
+ def getWarpCoeff (indices, device):
329
+ """
330
+ Gets coefficients used for calculating final intermediate
331
+ frame `It_gen` from backwarped images using flows F_t_0 and F_t_1.
332
+
333
+ It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1)
334
+
335
+ where,
336
+ C0 = 1 - t
337
+ C1 = t
338
+
339
+ V_t_0, V_t_1 --> visibility maps
340
+ g_I_0_F_t_0, g_I_1_F_t_1 --> backwarped intermediate frames
341
+
342
+ Parameters
343
+ ----------
344
+ indices : tensor
345
+ indices corresponding to the intermediate frame positions
346
+ of all samples in the batch.
347
+ device : device
348
+ computation device (cpu/cuda).
349
+
350
+ Returns
351
+ -------
352
+ tensor
353
+ coefficients C0 and C1.
354
+ """
355
+
356
+
357
+ # Convert indices tensor to numpy array
358
+ ind = indices.detach().numpy()
359
+ C0 = 1 - t[ind]
360
+ C1 = t[ind]
361
+ return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device)