π Merge branch 'MODELv2' into TEST
Browse files- yolo/model/module.py +9 -324
yolo/model/module.py
CHANGED
@@ -47,6 +47,15 @@ class Pool(nn.Module):
|
|
47 |
return self.pool(x)
|
48 |
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# ----------- Detection Class ----------- #
|
51 |
class Detection(nn.Module):
|
52 |
"""A single YOLO Detection head for detection models"""
|
@@ -351,327 +360,3 @@ class CBFuse(nn.Module):
|
|
351 |
res = [F.interpolate(x[pick_id], size=target_size, mode=self.mode) for pick_id, x in zip(self.idx, x_list)]
|
352 |
out = torch.stack(res + [target]).sum(dim=0)
|
353 |
return out
|
354 |
-
|
355 |
-
|
356 |
-
############# Waiting For Refactor #############
|
357 |
-
# ResNet
|
358 |
-
class Res(nn.Module):
|
359 |
-
# ResNet bottleneck
|
360 |
-
def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):
|
361 |
-
|
362 |
-
super().__init__()
|
363 |
-
|
364 |
-
h_channels = int(in_channels * ratio)
|
365 |
-
self.cv1 = Conv(in_channels, h_channels, 1, 1, act=act)
|
366 |
-
self.cv2 = Conv(h_channels, h_channels, 3, 1, groups=groups, act=act)
|
367 |
-
self.cv3 = Conv(h_channels, out_channels, 1, 1, act=act)
|
368 |
-
|
369 |
-
def forward(self, x):
|
370 |
-
return x + self.cv3(self.cv2(self.cv1(x)))
|
371 |
-
|
372 |
-
|
373 |
-
class RepRes(nn.Module):
|
374 |
-
# RepResNet bottleneck
|
375 |
-
def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):
|
376 |
-
|
377 |
-
super().__init__()
|
378 |
-
|
379 |
-
h_channels = int(in_channels * ratio)
|
380 |
-
self.cv1 = Conv(in_channels, h_channels, 1, 1, act=act)
|
381 |
-
self.cv2 = RepConv(h_channels, h_channels, 3, 1, groups=groups, act=act)
|
382 |
-
self.cv3 = Conv(h_channels, out_channels, 1, 1, act=act)
|
383 |
-
|
384 |
-
def forward(self, x):
|
385 |
-
return x + self.cv3(self.cv2(self.cv1(x)))
|
386 |
-
|
387 |
-
|
388 |
-
class ConvBlock(nn.Module):
|
389 |
-
# ConvBlock
|
390 |
-
def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
|
391 |
-
|
392 |
-
super().__init__()
|
393 |
-
|
394 |
-
h_channels = int(in_channels * ratio)
|
395 |
-
self.cv1 = (
|
396 |
-
Conv(in_channels, in_channels, 3, 1, act=act)
|
397 |
-
if repeat == 1
|
398 |
-
else Conv(in_channels, h_channels, 3, 1, act=act)
|
399 |
-
)
|
400 |
-
self.cb = (
|
401 |
-
nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
|
402 |
-
if repeat > 2
|
403 |
-
else nn.Identity()
|
404 |
-
)
|
405 |
-
self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
|
406 |
-
|
407 |
-
def forward(self, x):
|
408 |
-
return self.cv2(self.cb(self.cv1(x)))
|
409 |
-
|
410 |
-
|
411 |
-
class RepConvBlock(nn.Module):
|
412 |
-
# ConvBlock
|
413 |
-
def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
|
414 |
-
|
415 |
-
super().__init__()
|
416 |
-
|
417 |
-
h_channels = int(in_channels * ratio)
|
418 |
-
self.cv1 = (
|
419 |
-
Conv(in_channels, in_channels, 3, 1, act=act)
|
420 |
-
if repeat == 1
|
421 |
-
else RepConv(in_channels, h_channels, 3, 1, act=act)
|
422 |
-
)
|
423 |
-
self.cb = (
|
424 |
-
nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
|
425 |
-
if repeat > 2
|
426 |
-
else nn.Identity()
|
427 |
-
)
|
428 |
-
self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
|
429 |
-
|
430 |
-
def forward(self, x):
|
431 |
-
return self.cv2(self.cb(self.cv1(x)))
|
432 |
-
|
433 |
-
|
434 |
-
class ResConvBlock(nn.Module):
|
435 |
-
# ResConvBlock
|
436 |
-
def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
|
437 |
-
|
438 |
-
super().__init__()
|
439 |
-
|
440 |
-
h_channels = int(in_channels * ratio)
|
441 |
-
self.cv1 = (
|
442 |
-
Conv(in_channels, in_channels, 3, 1, act=act)
|
443 |
-
if repeat == 1
|
444 |
-
else Conv(in_channels, h_channels, 3, 1, act=act)
|
445 |
-
)
|
446 |
-
self.cb = (
|
447 |
-
nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
|
448 |
-
if repeat > 2
|
449 |
-
else nn.Identity()
|
450 |
-
)
|
451 |
-
self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
|
452 |
-
|
453 |
-
def forward(self, x):
|
454 |
-
return x + self.cv2(self.cb(self.cv1(x)))
|
455 |
-
|
456 |
-
|
457 |
-
class ResRepConvBlock(nn.Module):
|
458 |
-
# ResConvBlock
|
459 |
-
def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
|
460 |
-
|
461 |
-
super().__init__()
|
462 |
-
|
463 |
-
h_channels = int(in_channels * ratio)
|
464 |
-
self.cv1 = (
|
465 |
-
Conv(in_channels, in_channels, 3, 1, act=act)
|
466 |
-
if repeat == 1
|
467 |
-
else RepConv(in_channels, h_channels, 3, 1, act=act)
|
468 |
-
)
|
469 |
-
self.cb = (
|
470 |
-
nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
|
471 |
-
if repeat > 2
|
472 |
-
else nn.Identity()
|
473 |
-
)
|
474 |
-
self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
|
475 |
-
|
476 |
-
def forward(self, x):
|
477 |
-
return x + self.cv2(self.cb(self.cv1(x)))
|
478 |
-
|
479 |
-
|
480 |
-
# Darknet
|
481 |
-
class Dark(nn.Module):
|
482 |
-
# DarkNet bottleneck
|
483 |
-
def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):
|
484 |
-
|
485 |
-
super().__init__()
|
486 |
-
|
487 |
-
h_channels = int(in_channels * ratio)
|
488 |
-
self.cv1 = Conv(in_channels, h_channels, 1, 1, act=act)
|
489 |
-
self.cv2 = Conv(h_channels, out_channels, 3, 1, groups=groups, act=act)
|
490 |
-
|
491 |
-
def forward(self, x):
|
492 |
-
return x + self.cv2(self.cv1(x))
|
493 |
-
|
494 |
-
|
495 |
-
class RepDark(nn.Module):
|
496 |
-
# RepDarkNet bottleneck
|
497 |
-
def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):
|
498 |
-
|
499 |
-
super().__init__()
|
500 |
-
|
501 |
-
h_channels = int(in_channels * ratio)
|
502 |
-
self.cv1 = RepConv(in_channels, h_channels, 3, 1, groups=groups, act=act)
|
503 |
-
self.cv2 = Conv(h_channels, out_channels, 1, 1, act=act)
|
504 |
-
|
505 |
-
def forward(self, x):
|
506 |
-
return x + self.cv2(self.cv1(x))
|
507 |
-
|
508 |
-
|
509 |
-
# CSPNet
|
510 |
-
class CSP(nn.Module):
|
511 |
-
# CSPNet
|
512 |
-
def __init__(self, in_channels, out_channels, repeat=1, cb_repeat=2, act=nn.ReLU()):
|
513 |
-
super().__init__()
|
514 |
-
h_channels = in_channels // 2
|
515 |
-
self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
|
516 |
-
self.cb = nn.Sequential(*(ResConvBlock(h_channels, act=act, repeat=cb_repeat) for _ in range(repeat)))
|
517 |
-
self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)
|
518 |
-
|
519 |
-
def forward(self, x):
|
520 |
-
x = list(self.cv1(x).chunk(2, 1))
|
521 |
-
x = torch.cat((self.cb(x[0]), x[1]), 1)
|
522 |
-
x = self.cv2(x)
|
523 |
-
return x
|
524 |
-
|
525 |
-
|
526 |
-
class CSPDark(nn.Module):
|
527 |
-
# CSPNet
|
528 |
-
def __init__(self, in_channels, out_channels, repeat=1, groups=1, act=nn.ReLU(), ratio=1.0):
|
529 |
-
|
530 |
-
super().__init__()
|
531 |
-
|
532 |
-
h_channels = in_channels // 2
|
533 |
-
self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
|
534 |
-
self.cb = nn.Sequential(
|
535 |
-
*(Dark(h_channels, h_channels, groups=groups, act=act, ratio=ratio) for _ in range(repeat))
|
536 |
-
)
|
537 |
-
self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)
|
538 |
-
|
539 |
-
def forward(self, x):
|
540 |
-
|
541 |
-
y = list(self.cv1(x).chunk(2, 1))
|
542 |
-
|
543 |
-
return self.cv2(torch.cat((self.cb(y[0]), y[1]), 1))
|
544 |
-
|
545 |
-
|
546 |
-
class CSPELAN(nn.Module):
|
547 |
-
# ELAN
|
548 |
-
def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):
|
549 |
-
|
550 |
-
super().__init__()
|
551 |
-
|
552 |
-
h_channels = med_channels // 2
|
553 |
-
self.cv1 = Conv(in_channels, med_channels, 1, 1)
|
554 |
-
self.cb = nn.ModuleList(CSP(h_channels, h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
|
555 |
-
self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)
|
556 |
-
|
557 |
-
def forward(self, x):
|
558 |
-
|
559 |
-
y = list(self.cv1(x).chunk(2, 1))
|
560 |
-
y.extend((m(y[-1])) for m in self.cb)
|
561 |
-
|
562 |
-
return self.cv2(torch.cat(y, 1))
|
563 |
-
|
564 |
-
|
565 |
-
class Concat(nn.Module):
|
566 |
-
def __init__(self, dim=1):
|
567 |
-
super(Concat, self).__init__()
|
568 |
-
self.dim = dim
|
569 |
-
|
570 |
-
def forward(self, x):
|
571 |
-
return torch.cat(x, self.dim)
|
572 |
-
|
573 |
-
|
574 |
-
# TODO: check if Mit
|
575 |
-
class SPPCSPConv(nn.Module):
|
576 |
-
# CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
|
577 |
-
def __init__(self, in_channels, out_channels, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
|
578 |
-
super(SPPCSPConv, self).__init__()
|
579 |
-
c_ = int(2 * out_channels * e) # hidden channels
|
580 |
-
self.cv1 = Conv(in_channels, c_, 1)
|
581 |
-
self.cv2 = Conv(in_channels, c_, 1)
|
582 |
-
self.cv3 = Conv(c_, c_, 3)
|
583 |
-
self.cv4 = Conv(c_, c_, 1)
|
584 |
-
self.m = nn.ModuleList([Pool(method="max", kernel_size=x, stride=1, padding=x // 2) for x in k])
|
585 |
-
self.cv5 = Conv(4 * c_, c_, 1)
|
586 |
-
self.cv6 = Conv(c_, c_, 3)
|
587 |
-
self.cv7 = Conv(2 * c_, out_channels, 1)
|
588 |
-
|
589 |
-
def forward(self, x):
|
590 |
-
x1 = self.cv4(self.cv3(self.cv1(x)))
|
591 |
-
y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
|
592 |
-
y2 = self.cv2(x)
|
593 |
-
return self.cv7(torch.cat((y1, y2), dim=1))
|
594 |
-
|
595 |
-
|
596 |
-
class ImplicitA(nn.Module):
|
597 |
-
"""
|
598 |
-
Implement YOLOR - implicit knowledge(Add), paper: https://arxiv.org/abs/2105.04206
|
599 |
-
"""
|
600 |
-
|
601 |
-
def __init__(self, channel: int, mean: float = 0.0, std: float = 0.02):
|
602 |
-
super().__init__()
|
603 |
-
self.channel = channel
|
604 |
-
self.mean = mean
|
605 |
-
self.std = std
|
606 |
-
|
607 |
-
self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
|
608 |
-
nn.init.normal_(self.implicit, mean=mean, std=self.std)
|
609 |
-
|
610 |
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
611 |
-
return self.implicit + x
|
612 |
-
|
613 |
-
|
614 |
-
class ImplicitM(nn.Module):
|
615 |
-
"""
|
616 |
-
Implement YOLOR - implicit knowledge(multiply), paper: https://arxiv.org/abs/2105.04206
|
617 |
-
"""
|
618 |
-
|
619 |
-
def __init__(self, channel: int, mean: float = 1.0, std: float = 0.02):
|
620 |
-
super().__init__()
|
621 |
-
self.channel = channel
|
622 |
-
self.mean = mean
|
623 |
-
self.std = std
|
624 |
-
|
625 |
-
self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
|
626 |
-
nn.init.normal_(self.implicit, mean=self.mean, std=self.std)
|
627 |
-
|
628 |
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
629 |
-
return self.implicit * x
|
630 |
-
|
631 |
-
|
632 |
-
class IDetect(nn.Module):
|
633 |
-
"""
|
634 |
-
#TODO: Add Detect class, change IDetect base class
|
635 |
-
"""
|
636 |
-
|
637 |
-
stride = None # strides computed during build
|
638 |
-
export = False # onnx export
|
639 |
-
end2end = False
|
640 |
-
include_nms = False
|
641 |
-
concat = False
|
642 |
-
|
643 |
-
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
|
644 |
-
super(IDetect, self).__init__()
|
645 |
-
self.nc = nc # number of classes
|
646 |
-
self.no = nc + 5 # number of outputs per anchor
|
647 |
-
self.nl = len(anchors) # number of detection layers
|
648 |
-
self.na = len(anchors[0]) // 2 # number of anchors
|
649 |
-
self.grid = [torch.zeros(1)] * self.nl # init grid
|
650 |
-
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
|
651 |
-
self.register_buffer("anchors", a) # shape(nl,na,2)
|
652 |
-
self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
653 |
-
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
654 |
-
|
655 |
-
self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
|
656 |
-
self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)
|
657 |
-
|
658 |
-
def forward(self, x):
|
659 |
-
# x = x.copy() # for profiling
|
660 |
-
z = [] # inference output
|
661 |
-
self.training |= self.export
|
662 |
-
for i in range(self.nl):
|
663 |
-
x[i] = self.m[i](self.ia[i](x[i])) # conv
|
664 |
-
x[i] = self.im[i](x[i])
|
665 |
-
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
666 |
-
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
667 |
-
|
668 |
-
if not self.training: # inference
|
669 |
-
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
670 |
-
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
|
671 |
-
|
672 |
-
y = x[i].sigmoid()
|
673 |
-
y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i] # xy
|
674 |
-
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
675 |
-
z.append(y.view(bs, -1, self.no))
|
676 |
-
|
677 |
-
return x if self.training else (torch.cat(z, 1), x)
|
|
|
47 |
return self.pool(x)
|
48 |
|
49 |
|
50 |
+
class Concat(nn.Module):
|
51 |
+
def __init__(self, dim=1):
|
52 |
+
super(Concat, self).__init__()
|
53 |
+
self.dim = dim
|
54 |
+
|
55 |
+
def forward(self, x):
|
56 |
+
return torch.cat(x, self.dim)
|
57 |
+
|
58 |
+
|
59 |
# ----------- Detection Class ----------- #
|
60 |
class Detection(nn.Module):
|
61 |
"""A single YOLO Detection head for detection models"""
|
|
|
360 |
res = [F.interpolate(x[pick_id], size=target_size, mode=self.mode) for pick_id, x in zip(self.idx, x_list)]
|
361 |
out = torch.stack(res + [target]).sum(dim=0)
|
362 |
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|