Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Jun 6, 2024

Commit

86ef0ef

1 Parent(s): 2c1f270

✨ [Add] New model, anchor2box move into model

Browse files

Files changed (7) hide show

yolo/config/model/v9-c.yaml +21 -2
yolo/model/module.py +39 -6
yolo/model/yolo.py +1 -1
yolo/tools/format_converters.py +4 -2
yolo/tools/loss_functions.py +11 -15
yolo/tools/solver.py +11 -22
yolo/utils/bounding_box_utils.py +6 -43

yolo/config/model/v9-c.yaml CHANGED Viewed

@@ -121,5 +121,24 @@ model:
         tags: A5
     - MultiheadDetection:
-        source: [A3, A4, A5, P3, P4, P5]
-        output: True

         tags: A5
     - MultiheadDetection:
+        source: [A3, A4, A5]
+        tags: aux_head
+    - Anchor2Box:
+        source: aux_head
+        output: True
+        args:
+            reg_max: ${model.anchor.reg_max}
+            strides: ${model.anchor.strides}
+        tags: aux_bbox
+  detection:
+    - MultiheadDetection:
+        source: [P3, P4, P5]
+        tags: reg_head
+    - Anchor2Box:
+        source: reg_head
+        output: True
+        args:
+            reg_max: ${model.anchor.reg_max}
+            strides: ${model.anchor.strides}
+        tags: reg_bbox

yolo/model/module.py CHANGED Viewed

@@ -2,10 +2,12 @@ from typing import Any, Dict, List, Optional, Tuple
 import torch
 import torch.nn.functional as F
 from loguru import logger
 from torch import Tensor, nn
 from torch.nn.common_types import _size_2_t
 from yolo.utils.module_utils import auto_pad, create_activation_function, round_up
@@ -56,7 +58,6 @@ class Detection(nn.Module):
         anchor_channels = 4 * reg_max
         first_neck, in_channels = in_channels
-        # TODO: round up head[0] channels or each head?
         anchor_neck = max(round_up(first_neck // 4, groups), anchor_channels, 16)
         class_neck = max(first_neck, min(num_classes * 2, 128))
@@ -83,18 +84,50 @@ class MultiheadDetection(nn.Module):
     def __init__(self, in_channels: List[int], num_classes: int, **head_kwargs):
         super().__init__()
-        # TODO: Refactor these parts
         self.heads = nn.ModuleList(
-            [
-                Detection((in_channels[3 * (idx // 3)], in_channel), num_classes, **head_kwargs)
-                for idx, in_channel in enumerate(in_channels)
-            ]
         )
     def forward(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
         return [head(x) for x, head in zip(x_list, self.heads)]
 # ----------- Backbone Class ----------- #
 class RepConv(nn.Module):
     """A convolutional block that combines two convolution layers (kernel and point-wise)."""

 import torch
 import torch.nn.functional as F
+from einops import rearrange
 from loguru import logger
 from torch import Tensor, nn
 from torch.nn.common_types import _size_2_t
+from yolo.utils.bounding_box_utils import generate_anchors
 from yolo.utils.module_utils import auto_pad, create_activation_function, round_up
         anchor_channels = 4 * reg_max
         first_neck, in_channels = in_channels
         anchor_neck = max(round_up(first_neck // 4, groups), anchor_channels, 16)
         class_neck = max(first_neck, min(num_classes * 2, 128))
     def __init__(self, in_channels: List[int], num_classes: int, **head_kwargs):
         super().__init__()
         self.heads = nn.ModuleList(
+            [Detection((in_channels[0], in_channel), num_classes, **head_kwargs) for in_channel in in_channels]
         )
     def forward(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
         return [head(x) for x, head in zip(x_list, self.heads)]
+class Anchor2Box(nn.Module):
+    def __init__(self, reg_max, strides) -> None:
+        super().__init__()
+        self.reg_max = reg_max
+        self.strides = strides
+        # TODO: read by cfg!
+        image_size = [640, 640]
+        self.class_num = 80
+        self.anchors, self.scaler = generate_anchors(image_size, self.strides)
+        reverse_reg = torch.arange(self.reg_max, dtype=torch.float32)
+        self.reverse_reg = nn.Parameter(reverse_reg, requires_grad=False)
+        self.anchors = nn.Parameter(self.anchors, requires_grad=False)
+        self.scaler = nn.Parameter(self.scaler, requires_grad=False)
+    def forward(self, predicts: List[Tensor]) -> Tensor:
+        """
+        args:
+            [B x AnchorClass x h1 x w1, B x AnchorClass x h2 x w2, B x AnchorClass x h3 x w3] // AnchorClass = 4 * 16 + 80
+        return:
+            [B x HW x ClassBbox] // HW = h1*w1 + h2*w2 + h3*w3, ClassBox = 80 + 4 (xyXY)
+        """
+        preds = []
+        for pred in predicts:
+            preds.append(rearrange(pred, "B AC h w -> B (h w) AC"))  # B x AC x h x w-> B x hw x AC
+        preds = torch.concat(preds, dim=1)  # -> B x (H W) x AC
+        preds_anc, preds_cls = torch.split(preds, (self.reg_max * 4, self.class_num), dim=-1)
+        preds_anc = rearrange(preds_anc, "B  hw (P R)-> B hw P R", P=4)
+        pred_LTRB = preds_anc.softmax(dim=-1) @ self.reverse_reg * self.scaler.view(1, -1, 1)
+        lt, rb = pred_LTRB.chunk(2, dim=-1)
+        preds_box = torch.cat([self.anchors - lt, self.anchors + rb], dim=-1)
+        predicts = torch.cat([preds_cls, preds_box], dim=-1)
+        return predicts, preds_anc
 # ----------- Backbone Class ----------- #
 class RepConv(nn.Module):
     """A convolutional block that combines two convolution layers (kernel and point-wise)."""

yolo/model/yolo.py CHANGED Viewed

@@ -130,7 +130,7 @@ def create_model(cfg: Config) -> YOLO:
     logger.info("✅ Success load model")
     if cfg.weight:
         if os.path.exists(cfg.weight):
-            model.model.load_state_dict(torch.load(cfg.weight))
             logger.info("✅ Success load model weight")
         else:
             logger.info(f"🌐 Weight {cfg.weight} not found, try downloading")

     logger.info("✅ Success load model")
     if cfg.weight:
         if os.path.exists(cfg.weight):
+            model.model.load_state_dict(torch.load(cfg.weight), strict=False)
             logger.info("✅ Success load model weight")
         else:
             logger.info(f"🌐 Weight {cfg.weight} not found, try downloading")

yolo/tools/format_converters.py CHANGED Viewed

@@ -17,13 +17,15 @@ def convert_weight(old_state_dict, new_state_dict, model_size: int = 38):
                     continue
                 _, _, conv_name, conv_idx, *details = weight_name.split(".")
                 if conv_name == "cv4" or conv_name == "cv5":
-                    conv_idx = str(int(conv_idx) + 3)
                 if conv_name == "cv2" or conv_name == "cv4":
                     conv_task = "anchor_conv"
                 if conv_name == "cv3" or conv_name == "cv5":
                     conv_task = "class_conv"
-                weight_name = ".".join(["37", "heads", conv_idx, conv_task, *details])
                 new_state_dict[weight_name] = weight_value
     return new_state_dict

                     continue
                 _, _, conv_name, conv_idx, *details = weight_name.split(".")
                 if conv_name == "cv4" or conv_name == "cv5":
+                    layer_idx = 39
+                else:
+                    layer_idx = 37
                 if conv_name == "cv2" or conv_name == "cv4":
                     conv_task = "anchor_conv"
                 if conv_name == "cv3" or conv_name == "cv5":
                     conv_task = "class_conv"
+                weight_name = ".".join([str(layer_idx), "heads", conv_idx, conv_task, *details])
                 new_state_dict[weight_name] = weight_value
     return new_state_dict

yolo/tools/loss_functions.py CHANGED Viewed

@@ -8,12 +8,7 @@ from torch import Tensor, nn
 from torch.nn import BCEWithLogitsLoss
 from yolo.config.config import Config
-from yolo.utils.bounding_box_utils import (
-    AnchorBoxConverter,
-    BoxMatcher,
-    calculate_iou,
-    generate_anchors,
-)
 from yolo.utils.module_utils import divide_into_chunks
@@ -80,14 +75,15 @@ class YOLOLoss:
         self.strides = cfg.model.anchor.strides
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.anchors, self.scaler = generate_anchors(self.image_size, self.strides, device)
         self.cls = BCELoss()
         self.dfl = DFLoss(self.anchors, self.scaler, self.reg_max)
         self.iou = BoxLoss()
         self.matcher = BoxMatcher(cfg.task.loss.matcher, self.class_num, self.anchors)
-        self.box_converter = AnchorBoxConverter(cfg.model, self.image_size, device)
     def separate_anchor(self, anchors):
         """
@@ -97,16 +93,17 @@ class YOLOLoss:
         anchors_box = anchors_box / self.scaler[None, :, None]
         return anchors_cls, anchors_box
-    def __call__(self, predicts: List[Tensor], targets: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
         # Batch_Size x (Anchor + Class) x H x W
         # TODO: check datatype, why targets has a little bit error with origin version
-        predicts, predicts_anc = self.box_converter(predicts)
         # For each predicted targets, assign a best suitable ground truth box.
-        align_targets, valid_masks = self.matcher(targets, predicts)
         targets_cls, targets_bbox = self.separate_anchor(align_targets)
-        predicts_cls, predicts_bbox = self.separate_anchor(predicts)
         cls_norm = targets_cls.sum()
         box_norm = targets_cls.sum(-1)[valid_masks]
@@ -133,9 +130,8 @@ class DualLoss:
     def __call__(self, predicts: List[Tensor], targets: Tensor) -> Tuple[Tensor, Dict[str, Tensor]]:
         # TODO: Need Refactor this region, make it flexible!
-        predicts = divide_into_chunks(predicts[0], 2)
-        aux_iou, aux_dfl, aux_cls = self.loss(predicts[0], targets)
-        main_iou, main_dfl, main_cls = self.loss(predicts[1], targets)
         loss_dict = {
             "BoxLoss": self.iou_rate * (aux_iou * self.aux_rate + main_iou),

 from torch.nn import BCEWithLogitsLoss
 from yolo.config.config import Config
+from yolo.utils.bounding_box_utils import BoxMatcher, calculate_iou, generate_anchors
 from yolo.utils.module_utils import divide_into_chunks
         self.strides = cfg.model.anchor.strides
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.anchors, self.scaler = generate_anchors(self.image_size, self.strides)
+        self.anchors = self.anchors.to(device)
+        self.scaler = self.scaler.to(device)
         self.cls = BCELoss()
         self.dfl = DFLoss(self.anchors, self.scaler, self.reg_max)
         self.iou = BoxLoss()
         self.matcher = BoxMatcher(cfg.task.loss.matcher, self.class_num, self.anchors)
     def separate_anchor(self, anchors):
         """
         anchors_box = anchors_box / self.scaler[None, :, None]
         return anchors_cls, anchors_box
+    def __call__(
+        self, predicts_box: List[Tensor], predicts_anc: Tensor, targets: Tensor
+    ) -> Tuple[Tensor, Tensor, Tensor]:
         # Batch_Size x (Anchor + Class) x H x W
         # TODO: check datatype, why targets has a little bit error with origin version
         # For each predicted targets, assign a best suitable ground truth box.
+        align_targets, valid_masks = self.matcher(targets, predicts_box)
         targets_cls, targets_bbox = self.separate_anchor(align_targets)
+        predicts_cls, predicts_bbox = self.separate_anchor(predicts_box)
         cls_norm = targets_cls.sum()
         box_norm = targets_cls.sum(-1)[valid_masks]
     def __call__(self, predicts: List[Tensor], targets: Tensor) -> Tuple[Tensor, Dict[str, Tensor]]:
         # TODO: Need Refactor this region, make it flexible!
+        aux_iou, aux_dfl, aux_cls = self.loss(*predicts[0], targets)
+        main_iou, main_dfl, main_cls = self.loss(*predicts[1], targets)
         loss_dict = {
             "BoxLoss": self.iou_rate * (aux_iou * self.aux_rate + main_iou),

yolo/tools/solver.py CHANGED Viewed

@@ -10,7 +10,7 @@ from yolo.model.yolo import YOLO
 from yolo.tools.data_loader import StreamDataLoader, create_dataloader
 from yolo.tools.drawer import draw_bboxes
 from yolo.tools.loss_functions import get_loss_function
-from yolo.utils.bounding_box_utils import AnchorBoxConverter, bbox_nms, calculate_map
 from yolo.utils.logging_utils import ProgressTracker
 from yolo.utils.model_utils import (
     ExponentialMovingAverage,
@@ -30,11 +30,8 @@ class ModelTrainer:
         self.progress = ProgressTracker(cfg.name, save_path, cfg.use_wandb)
         self.num_epochs = cfg.task.epoch
-        validation_dataloader = create_dataloader(cfg.task.validation.data, cfg.dataset, cfg.task.validation.task)
-        anchor2box = AnchorBoxConverter(cfg.model, cfg.image_size, device)
-        self.validator = ModelValidator(
-            cfg.task.validation, model, save_path, device, self.progress, anchor2box, validation_dataloader
-        )
         if getattr(train_cfg.ema, "enabled", False):
             self.ema = ExponentialMovingAverage(model, decay=train_cfg.ema.decay)
@@ -95,7 +92,7 @@ class ModelTrainer:
                 epoch_loss = self.train_one_epoch(dataloader)
                 self.progress.finish_one_epoch()
-                self.validator.solve()
 class ModelTester:
@@ -104,7 +101,6 @@ class ModelTester:
         self.device = device
         self.progress = ProgressTracker(cfg, save_path, cfg.use_wandb)
-        self.anchor2box = AnchorBoxConverter(cfg.model, cfg.image_size, device)
         self.nms = cfg.task.nms
         self.idx2label = cfg.class_list
         self.save_path = save_path
@@ -117,8 +113,7 @@ class ModelTester:
                 images = images.to(self.device)
                 with torch.no_grad():
                     raw_output = self.model(images)
-                predict, _ = self.anchor2box(raw_output[0][3:], with_logits=True)
-                nms_out = bbox_nms(predict, self.nms)
                 draw_bboxes(
                     images[0],
                     nms_out[0],
@@ -144,33 +139,27 @@ class ModelValidator:
         model: YOLO,
         save_path: str,
         device,
         progress: ProgressTracker,
-        anchor2box,
-        validation_dataloader,
     ):
         self.model = model
         self.device = device
         self.progress = progress
         self.save_path = save_path
-        self.anchor2box = anchor2box
         self.nms = validation_cfg.nms
-        self.validdataloader = validation_dataloader
-    def solve(self):
         # logger.info("🧪 Start Validation!")
         self.model.eval()
         iou_thresholds = torch.arange(0.5, 1.0, 0.05)
         map_all = []
-        self.progress.start_one_epoch(len(self.validdataloader))
-        for data, targets in self.validdataloader:
             data, targets = data.to(self.device), targets.to(self.device)
             with torch.no_grad():
                 raw_output = self.model(data)
-            predict, _ = self.anchor2box(raw_output[0][3:], with_logits=True)
-            nms_out = bbox_nms(predict, self.nms)
             for idx, predict in enumerate(nms_out):
                 map_value = calculate_map(predict, targets[idx], iou_thresholds)
                 map_all.append(map_value[0])

 from yolo.tools.data_loader import StreamDataLoader, create_dataloader
 from yolo.tools.drawer import draw_bboxes
 from yolo.tools.loss_functions import get_loss_function
+from yolo.utils.bounding_box_utils import bbox_nms, calculate_map
 from yolo.utils.logging_utils import ProgressTracker
 from yolo.utils.model_utils import (
     ExponentialMovingAverage,
         self.progress = ProgressTracker(cfg.name, save_path, cfg.use_wandb)
         self.num_epochs = cfg.task.epoch
+        self.validation_dataloader = create_dataloader(cfg.task.validation.data, cfg.dataset, cfg.task.validation.task)
+        self.validator = ModelValidator(cfg.task.validation, model, save_path, device, self.progress)
         if getattr(train_cfg.ema, "enabled", False):
             self.ema = ExponentialMovingAverage(model, decay=train_cfg.ema.decay)
                 epoch_loss = self.train_one_epoch(dataloader)
                 self.progress.finish_one_epoch()
+                self.validator.solve(self.validation_dataloader)
 class ModelTester:
         self.device = device
         self.progress = ProgressTracker(cfg, save_path, cfg.use_wandb)
         self.nms = cfg.task.nms
         self.idx2label = cfg.class_list
         self.save_path = save_path
                 images = images.to(self.device)
                 with torch.no_grad():
                     raw_output = self.model(images)
+                nms_out = bbox_nms(raw_output[-1][0], self.nms)
                 draw_bboxes(
                     images[0],
                     nms_out[0],
         model: YOLO,
         save_path: str,
         device,
+        # TODO: think Progress?
         progress: ProgressTracker,
     ):
         self.model = model
         self.device = device
         self.progress = progress
         self.save_path = save_path
         self.nms = validation_cfg.nms
+    def solve(self, dataloader):
         # logger.info("🧪 Start Validation!")
         self.model.eval()
+        # TODO: choice mAP metrics?
         iou_thresholds = torch.arange(0.5, 1.0, 0.05)
         map_all = []
+        self.progress.start_one_epoch(len(dataloader))
+        for data, targets in dataloader:
             data, targets = data.to(self.device), targets.to(self.device)
             with torch.no_grad():
                 raw_output = self.model(data)
+            nms_out = bbox_nms(raw_output[-1][0], self.nms)
             for idx, predict in enumerate(nms_out):
                 map_value = calculate_map(predict, targets[idx], iou_thresholds)
                 map_all.append(map_value[0])

yolo/utils/bounding_box_utils.py CHANGED Viewed

@@ -106,16 +106,16 @@ def transform_bbox(bbox: Tensor, indicator="xywh -> xyxy"):
     return bbox.to(dtype=data_type)
-def generate_anchors(image_size: List[int], strides: List[int], device):
     W, H = image_size
     anchors = []
     scaler = []
     for stride in strides:
         anchor_num = W // stride * H // stride
-        scaler.append(torch.full((anchor_num,), stride, device=device))
         shift = stride // 2
-        x = torch.arange(0, W, stride, device=device) + shift
-        y = torch.arange(0, H, stride, device=device) + shift
         anchor_x, anchor_y = torch.meshgrid(x, y, indexing="ij")
         anchor = torch.stack([anchor_y.flatten(), anchor_x.flatten()], dim=-1)
         anchors.append(anchor)
@@ -124,44 +124,6 @@ def generate_anchors(image_size: List[int], strides: List[int], device):
     return all_anchors, all_scalers
-class AnchorBoxConverter:
-    def __init__(self, model_cfg: ModelConfig, image_size: List[int], device: torch.device) -> None:
-        self.reg_max = model_cfg.anchor.reg_max
-        self.class_num = model_cfg.class_num
-        self.strides = model_cfg.anchor.strides
-        self.anchors, self.scaler = generate_anchors(image_size, self.strides, device)
-        self.reverse_reg = torch.arange(self.reg_max, dtype=torch.float32, device=device)
-    def __call__(self, predicts: List[Tensor], with_logits=False) -> Tensor:
-        """
-        args:
-            [B x AnchorClass x h1 x w1, B x AnchorClass x h2 x w2, B x AnchorClass x h3 x w3] // AnchorClass = 4 * 16 + 80
-        return:
-            [B x HW x ClassBbox] // HW = h1*w1 + h2*w2 + h3*w3, ClassBox = 80 + 4 (xyXY)
-        """
-        preds = []
-        for pred in predicts:
-            preds.append(rearrange(pred, "B AC h w -> B (h w) AC"))  # B x AC x h x w-> B x hw x AC
-        preds = torch.concat(preds, dim=1)  # -> B x (H W) x AC
-        preds_anc, preds_cls = torch.split(preds, (self.reg_max * 4, self.class_num), dim=-1)
-        preds_anc = rearrange(preds_anc, "B  hw (P R)-> B hw P R", P=4)
-        if with_logits:
-            preds_cls = preds_cls.sigmoid()
-        pred_LTRB = preds_anc.softmax(dim=-1) @ self.reverse_reg * self.scaler.view(1, -1, 1)
-        lt, rb = pred_LTRB.chunk(2, dim=-1)
-        pred_minXY = self.anchors - lt
-        pred_maxXY = self.anchors + rb
-        preds_box = torch.cat([pred_minXY, pred_maxXY], dim=-1)
-        predicts = torch.cat([preds_cls, preds_box], dim=-1)
-        return predicts, preds_anc
 class BoxMatcher:
     def __init__(self, cfg: MatcherConfig, class_num: int, anchors: Tensor) -> None:
         self.class_num = class_num
@@ -291,7 +253,8 @@ class BoxMatcher:
 def bbox_nms(predicts: Tensor, nms_cfg: NMSConfig):
     # TODO change function to class or set 80 to class_num instead of a number
-    cls_dist, bbox = predicts.split([80, 4], dim=-1)
     # filter class by confidence
     cls_val, cls_idx = cls_dist.max(dim=-1, keepdim=True)

     return bbox.to(dtype=data_type)
+def generate_anchors(image_size: List[int], strides: List[int]):
     W, H = image_size
     anchors = []
     scaler = []
     for stride in strides:
         anchor_num = W // stride * H // stride
+        scaler.append(torch.full((anchor_num,), stride))
         shift = stride // 2
+        x = torch.arange(0, W, stride) + shift
+        y = torch.arange(0, H, stride) + shift
         anchor_x, anchor_y = torch.meshgrid(x, y, indexing="ij")
         anchor = torch.stack([anchor_y.flatten(), anchor_x.flatten()], dim=-1)
         anchors.append(anchor)
     return all_anchors, all_scalers
 class BoxMatcher:
     def __init__(self, cfg: MatcherConfig, class_num: int, anchors: Tensor) -> None:
         self.class_num = class_num
 def bbox_nms(predicts: Tensor, nms_cfg: NMSConfig):
     # TODO change function to class or set 80 to class_num instead of a number
+    cls_dist, bbox = torch.split(predicts, [80, 4], dim=-1)
+    cls_dist = cls_dist.sigmoid()
     # filter class by confidence
     cls_val, cls_idx = cls_dist.max(dim=-1, keepdim=True)