jpterry commited on
Commit
bc0a3bf
·
1 Parent(s): 70d6ab6

everything in app.py

Browse files
Files changed (2) hide show
  1. app.py +864 -3
  2. requirements.txt +1 -2
app.py CHANGED
@@ -7,16 +7,39 @@ import numpy as np
7
  from PIL import Image
8
  from scipy import special
9
  import sys
10
- import timm
11
  from types import SimpleNamespace
12
  # from transformers import AutoModel, pipeline
13
  from transformers import AutoModelForImageClassification
14
  import torch
 
 
 
 
15
 
16
- sys.path.insert(1, "../")
17
  # from utils import model_utils, train_utils, data_utils, run_utils
18
  # from model_utils import jason_regnet_maker, jason_efficientnet_maker
19
- from model_utils.efficientnet_config import EfficientNetConfig, EfficientNetPreTrained
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  model_path = 'chlab/'
22
  # model_path = './models/'
@@ -50,6 +73,844 @@ effnet_hparams = {61: {
50
  activation_indices = {'efficientnet': [0, 3]}
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def normalize_array(x: list):
54
 
55
  '''Makes array between 0 and 1'''
 
7
  from PIL import Image
8
  from scipy import special
9
  import sys
10
+ # import timm
11
  from types import SimpleNamespace
12
  # from transformers import AutoModel, pipeline
13
  from transformers import AutoModelForImageClassification
14
  import torch
15
+ from torch import Tensor, nn
16
+ from torch import Tensor
17
+ from torchvision.models._utils import _make_divisible
18
+ from torchvision.ops import StochasticDepth
19
 
20
+ # sys.path.insert(1, "../")
21
  # from utils import model_utils, train_utils, data_utils, run_utils
22
  # from model_utils import jason_regnet_maker, jason_efficientnet_maker
23
+ # from model_utils.efficientnet_config import EfficientNetConfig, EfficientNetPreTrained
24
+
25
+
26
+ from transformers import PretrainedConfig, PreTrainedModel
27
+
28
+ from typing import List
29
+ import copy
30
+ import math
31
+ import warnings
32
+ from dataclasses import dataclass
33
+ from functools import partial
34
+ import sys
35
+ from typing import Any, Callable, List, Optional, Sequence, Tuple, Union
36
+
37
+
38
+
39
+ # sys.path.insert(1, "../")
40
+ # from utils.vision_modifications import Conv2dNormActivation, SqueezeExcitation
41
+
42
+ interpolate = torch.nn.functional.interpolate
43
 
44
  model_path = 'chlab/'
45
  # model_path = './models/'
 
73
  activation_indices = {'efficientnet': [0, 3]}
74
 
75
 
76
+ ########## EfficientNet ############
77
+ @dataclass
78
+ class _MBConvConfig:
79
+ expand_ratio: float
80
+ kernel: int
81
+ stride: int
82
+ input_channels: int
83
+ out_channels: int
84
+ num_layers: int
85
+ block: Callable[..., nn.Module]
86
+
87
+ @staticmethod
88
+ def adjust_channels(
89
+ channels: int, width_mult: float, min_value: Optional[int] = None
90
+ ) -> int:
91
+ return _make_divisible(channels * width_mult, 8, min_value)
92
+
93
+
94
+ class MBConvConfig(_MBConvConfig):
95
+ # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper
96
+ def __init__(
97
+ self,
98
+ expand_ratio: float,
99
+ kernel: int,
100
+ stride: int,
101
+ input_channels: int,
102
+ out_channels: int,
103
+ num_layers: int,
104
+ width_mult: float = 1.0,
105
+ depth_mult: float = 1.0,
106
+ block: Optional[Callable[..., nn.Module]] = None,
107
+ ) -> None:
108
+ input_channels = self.adjust_channels(input_channels, width_mult)
109
+ out_channels = self.adjust_channels(out_channels, width_mult)
110
+ num_layers = self.adjust_depth(num_layers, depth_mult)
111
+ if block is None:
112
+ block = MBConv
113
+ super().__init__(
114
+ expand_ratio,
115
+ kernel,
116
+ stride,
117
+ input_channels,
118
+ out_channels,
119
+ num_layers,
120
+ block,
121
+ )
122
+
123
+ @staticmethod
124
+ def adjust_depth(num_layers: int, depth_mult: float):
125
+ return int(math.ceil(num_layers * depth_mult))
126
+
127
+
128
+ class FusedMBConvConfig(_MBConvConfig):
129
+ # Stores information listed at Table 4 of the EfficientNetV2 paper
130
+ def __init__(
131
+ self,
132
+ expand_ratio: float,
133
+ kernel: int,
134
+ stride: int,
135
+ input_channels: int,
136
+ out_channels: int,
137
+ num_layers: int,
138
+ block: Optional[Callable[..., nn.Module]] = None,
139
+ ) -> None:
140
+ if block is None:
141
+ block = FusedMBConv
142
+ super().__init__(
143
+ expand_ratio,
144
+ kernel,
145
+ stride,
146
+ input_channels,
147
+ out_channels,
148
+ num_layers,
149
+ block,
150
+ )
151
+
152
+
153
+ class MBConv(nn.Module):
154
+ def __init__(
155
+ self,
156
+ cnf: MBConvConfig,
157
+ stochastic_depth_prob: float,
158
+ norm_layer: Callable[..., nn.Module],
159
+ se_layer: Callable[..., nn.Module] = SqueezeExcitation,
160
+ ) -> None:
161
+ super().__init__()
162
+
163
+ if not (1 <= cnf.stride <= 2):
164
+ raise ValueError("illegal stride value")
165
+
166
+ self.use_res_connect = (
167
+ cnf.stride == 1 and cnf.input_channels == cnf.out_channels
168
+ )
169
+
170
+ layers: List[nn.Module] = []
171
+ activation_layer = nn.SiLU
172
+
173
+ # expand
174
+ expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
175
+ if expanded_channels != cnf.input_channels:
176
+ layers.append(
177
+ Conv2dNormActivation(
178
+ cnf.input_channels,
179
+ expanded_channels,
180
+ kernel_size=1,
181
+ norm_layer=norm_layer,
182
+ activation_layer=activation_layer,
183
+ )
184
+ )
185
+
186
+ # depthwise
187
+ layers.append(
188
+ Conv2dNormActivation(
189
+ expanded_channels,
190
+ expanded_channels,
191
+ kernel_size=cnf.kernel,
192
+ stride=cnf.stride,
193
+ groups=expanded_channels,
194
+ norm_layer=norm_layer,
195
+ activation_layer=activation_layer,
196
+ )
197
+ )
198
+
199
+ # squeeze and excitation
200
+ squeeze_channels = max(1, cnf.input_channels // 4)
201
+ layers.append(
202
+ se_layer(
203
+ expanded_channels,
204
+ squeeze_channels,
205
+ activation=partial(nn.SiLU, inplace=True),
206
+ )
207
+ )
208
+
209
+ # project
210
+ layers.append(
211
+ Conv2dNormActivation(
212
+ expanded_channels,
213
+ cnf.out_channels,
214
+ kernel_size=1,
215
+ norm_layer=norm_layer,
216
+ activation_layer=None,
217
+ )
218
+ )
219
+
220
+ self.block = nn.Sequential(*layers)
221
+ self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
222
+ self.out_channels = cnf.out_channels
223
+
224
+ def forward(self, input: Tensor) -> Tensor:
225
+ result = self.block(input)
226
+ if self.use_res_connect:
227
+ result = self.stochastic_depth(result)
228
+ result += input
229
+ return result
230
+
231
+
232
+ class FusedMBConv(nn.Module):
233
+ def __init__(
234
+ self,
235
+ cnf: FusedMBConvConfig,
236
+ stochastic_depth_prob: float,
237
+ norm_layer: Callable[..., nn.Module],
238
+ ) -> None:
239
+ super().__init__()
240
+
241
+ if not (1 <= cnf.stride <= 2):
242
+ raise ValueError("illegal stride value")
243
+
244
+ self.use_res_connect = (
245
+ cnf.stride == 1 and cnf.input_channels == cnf.out_channels
246
+ )
247
+
248
+ layers: List[nn.Module] = []
249
+ activation_layer = nn.SiLU
250
+
251
+ expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
252
+ if expanded_channels != cnf.input_channels:
253
+ # fused expand
254
+ layers.append(
255
+ Conv2dNormActivation(
256
+ cnf.input_channels,
257
+ expanded_channels,
258
+ kernel_size=cnf.kernel,
259
+ stride=cnf.stride,
260
+ norm_layer=norm_layer,
261
+ activation_layer=activation_layer,
262
+ )
263
+ )
264
+
265
+ # project
266
+ layers.append(
267
+ Conv2dNormActivation(
268
+ expanded_channels,
269
+ cnf.out_channels,
270
+ kernel_size=1,
271
+ norm_layer=norm_layer,
272
+ activation_layer=None,
273
+ )
274
+ )
275
+ else:
276
+ layers.append(
277
+ Conv2dNormActivation(
278
+ cnf.input_channels,
279
+ cnf.out_channels,
280
+ kernel_size=cnf.kernel,
281
+ stride=cnf.stride,
282
+ norm_layer=norm_layer,
283
+ activation_layer=activation_layer,
284
+ )
285
+ )
286
+
287
+ self.block = nn.Sequential(*layers)
288
+ self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
289
+ self.out_channels = cnf.out_channels
290
+
291
+ def forward(self, input: Tensor) -> Tensor:
292
+ result = self.block(input)
293
+ if self.use_res_connect:
294
+ result = self.stochastic_depth(result)
295
+ result += input
296
+ return result
297
+
298
+
299
+ class EfficientNetConfig(PretrainedConfig):
300
+
301
+ model_type = "efficientnet"
302
+
303
+ def __init__(
304
+ self,
305
+ # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
306
+ dropout: float=0.25,
307
+ num_channels: int = 61,
308
+ stochastic_depth_prob: float = 0.2,
309
+ num_classes: int = 2,
310
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
311
+ # last_channel: Optional[int] = None,
312
+ size: str='v2_s',
313
+ width_mult: float = 1.0,
314
+ depth_mult: float = 1.0,
315
+ **kwargs: Any,
316
+ ) -> None:
317
+ """
318
+ EfficientNet V1 and V2 main class
319
+
320
+ Args:
321
+ inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
322
+ dropout (float): The droupout probability
323
+ stochastic_depth_prob (float): The stochastic depth probability
324
+ num_classes (int): Number of classes
325
+ norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
326
+ last_channel (int): The number of channels on the penultimate layer
327
+ """
328
+
329
+
330
+ # self.model = EfficientNet(
331
+ # dropout=dropout,
332
+ # num_channels=num_channels,
333
+ # num_classes=num_classes,
334
+ # size=size,
335
+ # stochastic_depth_prob=stochastic_depth_prob,
336
+ # width_mult=width_mult,
337
+ # depth_mult=depth_mult,
338
+ # )
339
+
340
+ #
341
+ self.dropout=dropout
342
+ self.num_channels=num_channels
343
+ self.num_classes=num_classes
344
+ self.size=size
345
+ self.stochastic_depth_prob=stochastic_depth_prob
346
+ self.width_mult=width_mult
347
+ self.depth_mult=depth_mult
348
+
349
+ super().__init__(**kwargs)
350
+
351
+
352
+ class EfficientNetPreTrained(PreTrainedModel):
353
+
354
+ config_class = EfficientNetConfig
355
+
356
+ def __init__(
357
+ self,
358
+ config
359
+ ):
360
+ super().__init__(config)
361
+ self.model = EfficientNet( dropout=config.dropout,
362
+ num_channels=config.num_channels,
363
+ num_classes=config.num_classes,
364
+ size=config.size,
365
+ stochastic_depth_prob=config.stochastic_depth_prob,
366
+ width_mult=config.width_mult,
367
+ depth_mult=config.depth_mult,)
368
+
369
+ def forward(self, tensor):
370
+ return self.model.forward(tensor)
371
+
372
+
373
+ class EfficientNet(nn.Module):
374
+
375
+
376
+ def __init__(
377
+ self,
378
+ # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
379
+ dropout: float=0.25,
380
+ num_channels: int = 61,
381
+ stochastic_depth_prob: float = 0.2,
382
+ num_classes: int = 2,
383
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
384
+ # last_channel: Optional[int] = None,
385
+ size: str='v2_s',
386
+ width_mult: float = 1.0,
387
+ depth_mult: float = 1.0,
388
+ **kwargs: Any,
389
+ ) -> None:
390
+ """
391
+ EfficientNet V1 and V2 main class
392
+
393
+ Args:
394
+ inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
395
+ dropout (float): The droupout probability
396
+ stochastic_depth_prob (float): The stochastic depth probability
397
+ num_classes (int): Number of classes
398
+ norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
399
+ last_channel (int): The number of channels on the penultimate layer
400
+ """
401
+ super().__init__()
402
+ # _log_api_usage_once(self)
403
+
404
+ inverted_residual_setting, last_channel = _efficientnet_conf(
405
+ "efficientnet_%s" % (size), width_mult=width_mult, depth_mult=depth_mult
406
+ )
407
+
408
+ if not inverted_residual_setting:
409
+ raise ValueError("The inverted_residual_setting should not be empty")
410
+ elif not (
411
+ isinstance(inverted_residual_setting, Sequence)
412
+ and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting])
413
+ ):
414
+ raise TypeError(
415
+ "The inverted_residual_setting should be List[MBConvConfig]"
416
+ )
417
+
418
+ if "block" in kwargs:
419
+ warnings.warn(
420
+ "The parameter 'block' is deprecated since 0.13 and will be removed 0.15. "
421
+ "Please pass this information on 'MBConvConfig.block' instead."
422
+ )
423
+ if kwargs["block"] is not None:
424
+ for s in inverted_residual_setting:
425
+ if isinstance(s, MBConvConfig):
426
+ s.block = kwargs["block"]
427
+
428
+ if norm_layer is None:
429
+ norm_layer = nn.BatchNorm2d
430
+
431
+ layers: List[nn.Module] = []
432
+
433
+ # building first layer
434
+ firstconv_output_channels = inverted_residual_setting[0].input_channels
435
+ layers.append(
436
+ Conv2dNormActivation(
437
+ num_channels,
438
+ firstconv_output_channels,
439
+ kernel_size=3,
440
+ stride=2,
441
+ norm_layer=norm_layer,
442
+ activation_layer=nn.SiLU,
443
+ )
444
+ )
445
+
446
+ # building inverted residual blocks
447
+ total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
448
+ stage_block_id = 0
449
+ for cnf in inverted_residual_setting:
450
+ stage: List[nn.Module] = []
451
+ for _ in range(cnf.num_layers):
452
+ # copy to avoid modifications. shallow copy is enough
453
+ block_cnf = copy.copy(cnf)
454
+
455
+ # overwrite info if not the first conv in the stage
456
+ if stage:
457
+ block_cnf.input_channels = block_cnf.out_channels
458
+ block_cnf.stride = 1
459
+
460
+ # adjust stochastic depth probability based on the depth of the stage block
461
+ sd_prob = (
462
+ stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
463
+ )
464
+
465
+ stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer))
466
+ stage_block_id += 1
467
+
468
+ layers.append(nn.Sequential(*stage))
469
+
470
+ # building last several layers
471
+ lastconv_input_channels = inverted_residual_setting[-1].out_channels
472
+ lastconv_output_channels = (
473
+ last_channel if last_channel is not None else 4 * lastconv_input_channels
474
+ )
475
+ layers.append(
476
+ Conv2dNormActivation(
477
+ lastconv_input_channels,
478
+ lastconv_output_channels,
479
+ kernel_size=1,
480
+ norm_layer=norm_layer,
481
+ activation_layer=nn.SiLU,
482
+ )
483
+ )
484
+
485
+ self.features = nn.Sequential(*layers)
486
+ self.avgpool = nn.AdaptiveAvgPool2d(1)
487
+ self.classifier = nn.Sequential(
488
+ nn.Dropout(p=dropout, inplace=True),
489
+ nn.Linear(lastconv_output_channels, num_classes),
490
+ )
491
+
492
+ for m in self.modules():
493
+ if isinstance(m, nn.Conv2d):
494
+ nn.init.kaiming_normal_(m.weight, mode="fan_out")
495
+ if m.bias is not None:
496
+ nn.init.zeros_(m.bias)
497
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
498
+ nn.init.ones_(m.weight)
499
+ nn.init.zeros_(m.bias)
500
+ elif isinstance(m, nn.Linear):
501
+ init_range = 1.0 / math.sqrt(m.out_features)
502
+ nn.init.uniform_(m.weight, -init_range, init_range)
503
+ nn.init.zeros_(m.bias)
504
+
505
+ # super().__init__(**kwargs)
506
+
507
+ def _forward_impl(self, x: Tensor) -> Tensor:
508
+ x = self.features(x)
509
+
510
+ x = self.avgpool(x)
511
+ x = torch.flatten(x, 1)
512
+
513
+ x = self.classifier(x)
514
+
515
+ return x
516
+
517
+ def forward(self, x: Tensor) -> Tensor:
518
+ return self._forward_impl(x)
519
+
520
+
521
+ # def _efficientnet(
522
+ # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
523
+ # dropout: float,
524
+ # last_channel: Optional[int],
525
+ # weights=None,
526
+ # num_channels: int = 61,
527
+ # stochastic_depth_prob: float = 0.2,
528
+ # progress: bool = True,
529
+ # num_classes: int = 2,
530
+ # **kwargs: Any,
531
+ # ) -> EfficientNetCongig:
532
+
533
+ # model = EfficientNetCongif(
534
+ # inverted_residual_setting,
535
+ # dropout,
536
+ # num_classes=num_classes,
537
+ # num_channels=num_channels,
538
+ # stochastic_depth_prob=stochastic_depth_prob,
539
+ # last_channel=last_channel,
540
+ # **kwargs,
541
+ # )
542
+
543
+ # return model
544
+
545
+
546
+ def _efficientnet_conf(
547
+ arch: str,
548
+ **kwargs: Any,
549
+ ) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]:
550
+ inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]]
551
+ if arch.startswith("efficientnet_b"):
552
+ bneck_conf = partial(
553
+ MBConvConfig,
554
+ width_mult=kwargs.pop("width_mult"),
555
+ depth_mult=kwargs.pop("depth_mult"),
556
+ )
557
+ inverted_residual_setting = [
558
+ bneck_conf(1, 3, 1, 32, 16, 1),
559
+ bneck_conf(6, 3, 2, 16, 24, 2),
560
+ bneck_conf(6, 5, 2, 24, 40, 2),
561
+ bneck_conf(6, 3, 2, 40, 80, 3),
562
+ bneck_conf(6, 5, 1, 80, 112, 3),
563
+ bneck_conf(6, 5, 2, 112, 192, 4),
564
+ bneck_conf(6, 3, 1, 192, 320, 1),
565
+ ]
566
+ last_channel = None
567
+ elif arch.startswith("efficientnet_v2_s"):
568
+ inverted_residual_setting = [
569
+ FusedMBConvConfig(1, 3, 1, 24, 24, 2),
570
+ FusedMBConvConfig(4, 3, 2, 24, 48, 4),
571
+ FusedMBConvConfig(4, 3, 2, 48, 64, 4),
572
+ MBConvConfig(4, 3, 2, 64, 128, 6),
573
+ MBConvConfig(6, 3, 1, 128, 160, 9),
574
+ MBConvConfig(6, 3, 2, 160, 256, 15),
575
+ ]
576
+ last_channel = 1280
577
+ elif arch.startswith("efficientnet_v2_m"):
578
+ inverted_residual_setting = [
579
+ FusedMBConvConfig(1, 3, 1, 24, 24, 3),
580
+ FusedMBConvConfig(4, 3, 2, 24, 48, 5),
581
+ FusedMBConvConfig(4, 3, 2, 48, 80, 5),
582
+ MBConvConfig(4, 3, 2, 80, 160, 7),
583
+ MBConvConfig(6, 3, 1, 160, 176, 14),
584
+ MBConvConfig(6, 3, 2, 176, 304, 18),
585
+ MBConvConfig(6, 3, 1, 304, 512, 5),
586
+ ]
587
+ last_channel = 1280
588
+ elif arch.startswith("efficientnet_v2_l"):
589
+ inverted_residual_setting = [
590
+ FusedMBConvConfig(1, 3, 1, 32, 32, 4),
591
+ FusedMBConvConfig(4, 3, 2, 32, 64, 7),
592
+ FusedMBConvConfig(4, 3, 2, 64, 96, 7),
593
+ MBConvConfig(4, 3, 2, 96, 192, 10),
594
+ MBConvConfig(6, 3, 1, 192, 224, 19),
595
+ MBConvConfig(6, 3, 2, 224, 384, 25),
596
+ MBConvConfig(6, 3, 1, 384, 640, 7),
597
+ ]
598
+ last_channel = 1280
599
+ else:
600
+ raise ValueError(f"Unsupported model type {arch}")
601
+
602
+ return inverted_residual_setting, last_channel
603
+
604
+
605
+ #### extra torchvision stuff ####
606
+
607
+
608
+ class FrozenBatchNorm2d(torch.nn.Module):
609
+ """
610
+ BatchNorm2d where the batch statistics and the affine parameters are fixed
611
+
612
+ Args:
613
+ num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)``
614
+ eps (float): a value added to the denominator for numerical stability. Default: 1e-5
615
+ """
616
+
617
+ def __init__(
618
+ self,
619
+ num_features: int,
620
+ eps: float = 1e-5,
621
+ ):
622
+ super().__init__()
623
+ # _log_api_usage_once(self)
624
+ self.eps = eps
625
+ self.register_buffer("weight", torch.ones(num_features))
626
+ self.register_buffer("bias", torch.zeros(num_features))
627
+ self.register_buffer("running_mean", torch.zeros(num_features))
628
+ self.register_buffer("running_var", torch.ones(num_features))
629
+
630
+ def _load_from_state_dict(
631
+ self,
632
+ state_dict: dict,
633
+ prefix: str,
634
+ local_metadata: dict,
635
+ strict: bool,
636
+ missing_keys: List[str],
637
+ unexpected_keys: List[str],
638
+ error_msgs: List[str],
639
+ ):
640
+ num_batches_tracked_key = prefix + "num_batches_tracked"
641
+ if num_batches_tracked_key in state_dict:
642
+ del state_dict[num_batches_tracked_key]
643
+
644
+ super()._load_from_state_dict(
645
+ state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
646
+ )
647
+
648
+ def forward(self, x: Tensor) -> Tensor:
649
+ # move reshapes to the beginning
650
+ # to make it fuser-friendly
651
+ w = self.weight.reshape(1, -1, 1, 1)
652
+ b = self.bias.reshape(1, -1, 1, 1)
653
+ rv = self.running_var.reshape(1, -1, 1, 1)
654
+ rm = self.running_mean.reshape(1, -1, 1, 1)
655
+ scale = w * (rv + self.eps).rsqrt()
656
+ bias = b - rm * scale
657
+ return x * scale + bias
658
+
659
+ def __repr__(self) -> str:
660
+ return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})"
661
+
662
+
663
+ class ConvNormActivation(torch.nn.Sequential):
664
+ def __init__(
665
+ self,
666
+ in_channels: int,
667
+ out_channels: int,
668
+ kernel_size: int = 3,
669
+ stride: int = 1,
670
+ padding: Optional[int] = None,
671
+ groups: int = 1,
672
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
673
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
674
+ dilation: int = 1,
675
+ inplace: Optional[bool] = True,
676
+ bias: Optional[bool] = None,
677
+ conv_layer: Callable[..., torch.nn.Module] = torch.nn.Conv2d,
678
+ ) -> None:
679
+
680
+ if padding is None:
681
+ padding = (kernel_size - 1) // 2 * dilation
682
+ if bias is None:
683
+ bias = norm_layer is None
684
+
685
+ layers = [
686
+ conv_layer(
687
+ in_channels,
688
+ out_channels,
689
+ kernel_size,
690
+ stride,
691
+ padding,
692
+ dilation=dilation,
693
+ groups=groups,
694
+ bias=bias,
695
+ )
696
+ ]
697
+
698
+ if norm_layer is not None:
699
+ layers.append(norm_layer(out_channels))
700
+
701
+ if activation_layer is not None:
702
+ params = {} if inplace is None else {"inplace": inplace}
703
+ layers.append(activation_layer(**params))
704
+ super().__init__(*layers)
705
+ # _log_api_usage_once(self)
706
+ self.out_channels = out_channels
707
+
708
+ if self.__class__ == ConvNormActivation:
709
+ warnings.warn(
710
+ "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead."
711
+ )
712
+
713
+
714
+ class Conv2dNormActivation(ConvNormActivation):
715
+ """
716
+ Configurable block used for Convolution2d-Normalization-Activation blocks.
717
+
718
+ Args:
719
+ in_channels (int): Number of channels in the input image
720
+ out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
721
+ kernel_size: (int, optional): Size of the convolving kernel. Default: 3
722
+ stride (int, optional): Stride of the convolution. Default: 1
723
+ padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
724
+ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
725
+ norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d``
726
+ activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
727
+ dilation (int): Spacing between kernel elements. Default: 1
728
+ inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
729
+ bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
730
+
731
+ """
732
+
733
+ def __init__(
734
+ self,
735
+ in_channels: int,
736
+ out_channels: int,
737
+ kernel_size: int = 3,
738
+ stride: int = 1,
739
+ padding: Optional[int] = None,
740
+ groups: int = 1,
741
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
742
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
743
+ dilation: int = 1,
744
+ inplace: Optional[bool] = True,
745
+ bias: Optional[bool] = None,
746
+ ) -> None:
747
+
748
+ super().__init__(
749
+ in_channels,
750
+ out_channels,
751
+ kernel_size,
752
+ stride,
753
+ padding,
754
+ groups,
755
+ norm_layer,
756
+ activation_layer,
757
+ dilation,
758
+ inplace,
759
+ bias,
760
+ torch.nn.Conv2d,
761
+ )
762
+
763
+
764
+ class Conv3dNormActivation(ConvNormActivation):
765
+ """
766
+ Configurable block used for Convolution3d-Normalization-Activation blocks.
767
+
768
+ Args:
769
+ in_channels (int): Number of channels in the input video.
770
+ out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
771
+ kernel_size: (int, optional): Size of the convolving kernel. Default: 3
772
+ stride (int, optional): Stride of the convolution. Default: 1
773
+ padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
774
+ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
775
+ norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm3d``
776
+ activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
777
+ dilation (int): Spacing between kernel elements. Default: 1
778
+ inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
779
+ bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
780
+ """
781
+
782
+ def __init__(
783
+ self,
784
+ in_channels: int,
785
+ out_channels: int,
786
+ kernel_size: int = 3,
787
+ stride: int = 1,
788
+ padding: Optional[int] = None,
789
+ groups: int = 1,
790
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm3d,
791
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
792
+ dilation: int = 1,
793
+ inplace: Optional[bool] = True,
794
+ bias: Optional[bool] = None,
795
+ ) -> None:
796
+
797
+ super().__init__(
798
+ in_channels,
799
+ out_channels,
800
+ kernel_size,
801
+ stride,
802
+ padding,
803
+ groups,
804
+ norm_layer,
805
+ activation_layer,
806
+ dilation,
807
+ inplace,
808
+ bias,
809
+ torch.nn.Conv3d,
810
+ )
811
+
812
+
813
+ class SqueezeExcitation(torch.nn.Module):
814
+ """
815
+ This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
816
+ Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3.
817
+
818
+ Args:
819
+ input_channels (int): Number of channels in the input image
820
+ squeeze_channels (int): Number of squeeze channels
821
+ activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU``
822
+ scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid``
823
+ """
824
+
825
+ def __init__(
826
+ self,
827
+ input_channels: int,
828
+ squeeze_channels: int,
829
+ activation: Callable[..., torch.nn.Module] = torch.nn.ReLU,
830
+ scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid,
831
+ ) -> None:
832
+ super().__init__()
833
+ # _log_api_usage_once(self)
834
+ self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
835
+ self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1)
836
+ self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1)
837
+ self.activation = activation()
838
+ self.scale_activation = scale_activation()
839
+
840
+ def _scale(self, input: Tensor) -> Tensor:
841
+ scale = self.avgpool(input)
842
+ scale = self.fc1(scale)
843
+ scale = self.activation(scale)
844
+ scale = self.fc2(scale)
845
+ return self.scale_activation(scale)
846
+
847
+ def forward(self, input: Tensor) -> Tensor:
848
+ scale = self._scale(input)
849
+ return scale * input
850
+
851
+
852
+ class MLP(torch.nn.Sequential):
853
+ """This block implements the multi-layer perceptron (MLP) module.
854
+
855
+ Args:
856
+ in_channels (int): Number of channels of the input
857
+ hidden_channels (List[int]): List of the hidden channel dimensions
858
+ norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``None``
859
+ activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
860
+ inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
861
+ bias (bool): Whether to use bias in the linear layer. Default ``True``
862
+ dropout (float): The probability for the dropout layer. Default: 0.0
863
+ """
864
+
865
+ def __init__(
866
+ self,
867
+ in_channels: int,
868
+ hidden_channels: List[int],
869
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = None,
870
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
871
+ inplace: Optional[bool] = True,
872
+ bias: bool = True,
873
+ dropout: float = 0.0,
874
+ ):
875
+ # The addition of `norm_layer` is inspired from the implementation of TorchMultimodal:
876
+ # https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py
877
+ params = {} if inplace is None else {"inplace": inplace}
878
+
879
+ layers = []
880
+ in_dim = in_channels
881
+ for hidden_dim in hidden_channels[:-1]:
882
+ layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias))
883
+ if norm_layer is not None:
884
+ layers.append(norm_layer(hidden_dim))
885
+ layers.append(activation_layer(**params))
886
+ layers.append(torch.nn.Dropout(dropout, **params))
887
+ in_dim = hidden_dim
888
+
889
+ layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias))
890
+ layers.append(torch.nn.Dropout(dropout, **params))
891
+
892
+ super().__init__(*layers)
893
+ # _log_api_usage_once(self)
894
+
895
+
896
+ class Permute(torch.nn.Module):
897
+ """This module returns a view of the tensor input with its dimensions permuted.
898
+
899
+ Args:
900
+ dims (List[int]): The desired ordering of dimensions
901
+ """
902
+
903
+ def __init__(self, dims: List[int]):
904
+ super().__init__()
905
+ self.dims = dims
906
+
907
+ def forward(self, x: Tensor) -> Tensor:
908
+ return torch.permute(x, self.dims)
909
+
910
+
911
+
912
+
913
+
914
  def normalize_array(x: list):
915
 
916
  '''Makes array between 0 and 1'''
requirements.txt CHANGED
@@ -4,5 +4,4 @@ matplotlib
4
  scipy
5
  Pillow
6
  transformers
7
- torchvision
8
- timm
 
4
  scipy
5
  Pillow
6
  transformers
7
+ torchvision