Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on May 23, 2024

Commit

cbbfcfe

1 Parent(s): 4b68a08

🚚 [Move] loss function to yolo/utils

Browse files

also remove duplicate files, and remove main function of loss, need to write pytest

Files changed (15) hide show

config/config.py +0 -107
config/config.yaml +0 -11
config/data/augmentation.yaml +0 -3
config/data/download.yaml +0 -21
config/hyper/default.yaml +0 -35
utils/converter_json2txt.py +0 -86
utils/data_augment.py +0 -125
utils/dataloader.py +0 -206
utils/drawer.py +0 -41
utils/get_dataset.py +0 -84
utils/loss.py +0 -184
yolo/config/config.py +17 -1
yolo/config/hyper/default.yaml +16 -0
yolo/tools/bbox_helper.py +1 -1
yolo/utils/loss.py +164 -0

config/config.py DELETED Viewed

@@ -1,107 +0,0 @@
-from dataclasses import dataclass
-from typing import Dict, List, Union
-@dataclass
-class AnchorConfig:
-    reg_max: int
-    strides: List[int]
-@dataclass
-class Model:
-    anchor: AnchorConfig
-    model: Dict[str, List[Dict[str, Union[Dict, List, int]]]]
-@dataclass
-class Download:
-    auto: bool
-    path: str
-@dataclass
-class DataLoaderConfig:
-    batch_size: int
-    shuffle: bool
-    num_workers: int
-    pin_memory: bool
-    image_size: List[int]
-    class_num: int
-@dataclass
-class OptimizerArgs:
-    lr: float
-    weight_decay: float
-@dataclass
-class OptimizerConfig:
-    type: str
-    args: OptimizerArgs
-@dataclass
-class SchedulerArgs:
-    step_size: int
-    gamma: float
-@dataclass
-class SchedulerConfig:
-    type: str
-    args: SchedulerArgs
-@dataclass
-class EMAConfig:
-    enabled: bool
-    decay: float
-@dataclass
-class MatcherConfig:
-    iou: str
-    topk: int
-    factor: Dict[str, int]
-@dataclass
-class TrainConfig:
-    optimizer: OptimizerConfig
-    scheduler: SchedulerConfig
-    ema: EMAConfig
-    matcher: MatcherConfig
-@dataclass
-class HyperConfig:
-    data: DataLoaderConfig
-    train: TrainConfig
-@dataclass
-class Dataset:
-    file_name: str
-    num_files: int
-@dataclass
-class Datasets:
-    base_url: str
-    images: Dict[str, Dataset]
-@dataclass
-class Download:
-    auto: bool
-    save_path: str
-    datasets: Datasets
-@dataclass
-class Config:
-    model: Model
-    download: Download
-    hyper: HyperConfig

config/config.yaml DELETED Viewed

@@ -1,11 +0,0 @@
-hydra:
-  run:
-    dir: ./runs
-defaults:
- - data: coco
- - download: ../data/download
- - augmentation: ../data/augmentation
- - model: v7-base
- - hyper: default
- - _self_

config/data/augmentation.yaml DELETED Viewed

@@ -1,3 +0,0 @@
-Mosaic: 1
-# MixUp: 1
-HorizontalFlip: 0.5

config/data/download.yaml DELETED Viewed

@@ -1,21 +0,0 @@
-auto: True
-save_path: data/coco
-datasets:
-  images:
-    base_url: http://images.cocodataset.org/zips/
-    train2017:
-      file_name: train2017
-      file_num: 118287
-    val2017:
-      file_name: val2017
-      file_num: 5000
-    test2017:
-      file_name: test2017
-      file_num: 40670
-  annotations:
-    base_url: http://images.cocodataset.org/annotations/
-    annotations:
-      file_name: annotations_trainval2017
-hydra:
-  run:
-    dir: ./runs

config/hyper/default.yaml DELETED Viewed

@@ -1,35 +0,0 @@
-data:
-  batch_size: 4
-  shuffle: True
-  num_workers: 4
-  pin_memory: True
-  class_num: 80
-  image_size: [640, 640]
-train:
-  optimizer:
-    type: Adam
-    args:
-      lr: 0.001
-      weight_decay: 0.0001
-  loss:
-    BCELoss:
-      args:
-    BoxLoss:
-      args:
-      alpha: 0.1
-    DFLoss:
-      args:
-  matcher:
-    iou: CIoU
-    topk: 10
-    factor:
-      iou: 6.0
-      cls: 0.5
-  scheduler:
-    type: StepLR
-    args:
-      step_size: 10
-      gamma: 0.1
-  ema:
-      enabled: true
-      decay: 0.995

utils/converter_json2txt.py DELETED Viewed

@@ -1,86 +0,0 @@
-import json
-import os
-from typing import Dict, List, Optional
-from tqdm import tqdm
-def discretize_categories(categories: List[Dict[str, int]]) -> Dict[int, int]:
-    """
-    Maps each unique 'id' in the list of category dictionaries to a sequential integer index.
-    Indices are assigned based on the sorted 'id' values.
-    """
-    sorted_categories = sorted(categories, key=lambda category: category["id"])
-    return {category["id"]: index for index, category in enumerate(sorted_categories)}
-def process_annotations(
-    image_annotations: Dict[int, List[Dict]],
-    image_info_dict: Dict[int, tuple],
-    output_dir: str,
-    id_to_idx: Optional[Dict[int, int]] = None,
-) -> None:
-    """
-    Process and save annotations to files, with option to remap category IDs.
-    """
-    for image_id, annotations in tqdm(image_annotations.items(), desc="Processing annotations"):
-        file_path = os.path.join(output_dir, f"{image_id:0>12}.txt")
-        if not annotations:
-            continue
-        with open(file_path, "w") as file:
-            for annotation in annotations:
-                process_annotation(annotation, image_info_dict[image_id], id_to_idx, file)
-def process_annotation(annotation: Dict, image_dims: tuple, id_to_idx: Optional[Dict[int, int]], file) -> None:
-    """
-    Convert a single annotation's segmentation and write it to the open file handle.
-    """
-    category_id = annotation["category_id"]
-    segmentation = (
-        annotation["segmentation"][0]
-        if annotation["segmentation"] and isinstance(annotation["segmentation"][0], list)
-        else None
-    )
-    if segmentation is None:
-        return
-    img_width, img_height = image_dims
-    normalized_segmentation = normalize_segmentation(segmentation, img_width, img_height)
-    if id_to_idx:
-        category_id = id_to_idx.get(category_id, category_id)
-    file.write(f"{category_id} {' '.join(normalized_segmentation)}\n")
-def normalize_segmentation(segmentation: List[float], img_width: int, img_height: int) -> List[str]:
-    """
-    Normalize and format segmentation coordinates.
-    """
-    return [f"{x/img_width:.6f}" if i % 2 == 0 else f"{x/img_height:.6f}" for i, x in enumerate(segmentation)]
-def convert_annotations(json_file: str, output_dir: str) -> None:
-    """
-    Load annotation data from a JSON file and process all annotations.
-    """
-    with open(json_file) as file:
-        data = json.load(file)
-    os.makedirs(output_dir, exist_ok=True)
-    image_info_dict = {img["id"]: (img["width"], img["height"]) for img in data.get("images", [])}
-    id_to_idx = discretize_categories(data.get("categories", [])) if "categories" in data else None
-    image_annotations = {img_id: [] for img_id in image_info_dict}
-    for annotation in data.get("annotations", []):
-        if not annotation.get("iscrowd", False):
-            image_annotations[annotation["image_id"]].append(annotation)
-    process_annotations(image_annotations, image_info_dict, output_dir, id_to_idx)
-convert_annotations("./data/coco/annotations/instances_train2017.json", "./data/coco/labels/train2017/")
-convert_annotations("./data/coco/annotations/instances_val2017.json", "./data/coco/labels/val2017/")

utils/data_augment.py DELETED Viewed

@@ -1,125 +0,0 @@
-import numpy as np
-import torch
-from PIL import Image
-from torchvision.transforms import functional as TF
-class Compose:
-    """Composes several transforms together."""
-    def __init__(self, transforms, image_size: int = 640):
-        self.transforms = transforms
-        self.image_size = image_size
-        for transform in self.transforms:
-            if hasattr(transform, "set_parent"):
-                transform.set_parent(self)
-    def __call__(self, image, boxes):
-        for transform in self.transforms:
-            image, boxes = transform(image, boxes)
-        return image, boxes
-class HorizontalFlip:
-    """Randomly horizontally flips the image along with the bounding boxes."""
-    def __init__(self, prob=0.5):
-        self.prob = prob
-    def __call__(self, image, boxes):
-        if torch.rand(1) < self.prob:
-            image = TF.hflip(image)
-            boxes[:, [1, 3]] = 1 - boxes[:, [3, 1]]
-        return image, boxes
-class VerticalFlip:
-    """Randomly vertically flips the image along with the bounding boxes."""
-    def __init__(self, prob=0.5):
-        self.prob = prob
-    def __call__(self, image, boxes):
-        if torch.rand(1) < self.prob:
-            image = TF.vflip(image)
-            boxes[:, [2, 4]] = 1 - boxes[:, [4, 2]]
-        return image, boxes
-class Mosaic:
-    """Applies the Mosaic augmentation to a batch of images and their corresponding boxes."""
-    def __init__(self, prob=0.5):
-        self.prob = prob
-        self.parent = None
-    def set_parent(self, parent):
-        self.parent = parent
-    def __call__(self, image, boxes):
-        if torch.rand(1) >= self.prob:
-            return image, boxes
-        assert self.parent is not None, "Parent is not set. Mosaic cannot retrieve image size."
-        img_sz = self.parent.image_size  # Assuming `image_size` is defined in parent
-        more_data = self.parent.get_more_data(3)  # get 3 more images randomly
-        data = [(image, boxes)] + more_data
-        mosaic_image = Image.new("RGB", (2 * img_sz, 2 * img_sz))
-        vectors = np.array([(-1, -1), (0, -1), (-1, 0), (0, 0)])
-        center = np.array([img_sz, img_sz])
-        all_labels = []
-        for (image, boxes), vector in zip(data, vectors):
-            this_w, this_h = image.size
-            coord = tuple(center + vector * np.array([this_w, this_h]))
-            mosaic_image.paste(image, coord)
-            xmin, ymin, xmax, ymax = boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
-            xmin = (xmin * this_w + coord[0]) / (2 * img_sz)
-            xmax = (xmax * this_w + coord[0]) / (2 * img_sz)
-            ymin = (ymin * this_h + coord[1]) / (2 * img_sz)
-            ymax = (ymax * this_h + coord[1]) / (2 * img_sz)
-            adjusted_boxes = torch.stack([boxes[:, 0], xmin, ymin, xmax, ymax], dim=1)
-            all_labels.append(adjusted_boxes)
-        all_labels = torch.cat(all_labels, dim=0)
-        mosaic_image = mosaic_image.resize((img_sz, img_sz))
-        return mosaic_image, all_labels
-class MixUp:
-    """Applies the MixUp augmentation to a pair of images and their corresponding boxes."""
-    def __init__(self, prob=0.5, alpha=1.0):
-        self.alpha = alpha
-        self.prob = prob
-        self.parent = None
-    def set_parent(self, parent):
-        """Set the parent dataset object for accessing dataset methods."""
-        self.parent = parent
-    def __call__(self, image, boxes):
-        if torch.rand(1) >= self.prob:
-            return image, boxes
-        assert self.parent is not None, "Parent is not set. MixUp cannot retrieve additional data."
-        # Retrieve another image and its boxes randomly from the dataset
-        image2, boxes2 = self.parent.get_more_data()[0]
-        # Calculate the mixup lambda parameter
-        lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0 else 0.5
-        # Mix images
-        image1, image2 = TF.to_tensor(image), TF.to_tensor(image2)
-        mixed_image = lam * image1 + (1 - lam) * image2
-        # Mix bounding boxes
-        mixed_boxes = torch.cat([lam * boxes, (1 - lam) * boxes2])
-        return TF.to_pil_image(mixed_image), mixed_boxes

utils/dataloader.py DELETED Viewed

@@ -1,206 +0,0 @@
-import os
-from os import path
-from typing import List, Tuple, Union
-import diskcache as dc
-import hydra
-import numpy as np
-import torch
-from loguru import logger
-from PIL import Image
-from torch.utils.data import DataLoader, Dataset
-from torchvision.transforms import functional as TF
-from tqdm.rich import tqdm
-from tools.dataset_helper import (
-    create_image_info_dict,
-    find_labels_path,
-    get_scaled_segmentation,
-)
-from utils.data_augment import Compose, HorizontalFlip, MixUp, Mosaic, VerticalFlip
-from utils.drawer import draw_bboxes
-class YoloDataset(Dataset):
-    def __init__(self, config: dict, phase: str = "train2017", image_size: int = 640):
-        dataset_cfg = config.data
-        augment_cfg = config.augmentation
-        phase_name = dataset_cfg.get(phase, phase)
-        self.image_size = image_size
-        transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
-        self.transform = Compose(transforms, self.image_size)
-        self.transform.get_more_data = self.get_more_data
-        self.data = self.load_data(dataset_cfg.path, phase_name)
-    def load_data(self, dataset_path, phase_name):
-        """
-        Loads data from a cache or generates a new cache for a specific dataset phase.
-        Parameters:
-            dataset_path (str): The root path to the dataset directory.
-            phase_name (str): The specific phase of the dataset (e.g., 'train', 'test') to load or generate data for.
-        Returns:
-            dict: The loaded data from the cache for the specified phase.
-        """
-        cache_path = path.join(dataset_path, ".cache")
-        cache = dc.Cache(cache_path)
-        data = cache.get(phase_name)
-        if data is None:
-            logger.info("Generating {} cache", phase_name)
-            data = self.filter_data(dataset_path, phase_name)
-            cache[phase_name] = data
-        cache.close()
-        logger.info("📦 Loaded {} cache", phase_name)
-        data = cache[phase_name]
-        return data
-    def filter_data(self, dataset_path: str, phase_name: str) -> list:
-        """
-        Filters and collects dataset information by pairing images with their corresponding labels.
-        Parameters:
-            images_path (str): Path to the directory containing image files.
-            labels_path (str): Path to the directory containing label files.
-        Returns:
-            list: A list of tuples, each containing the path to an image file and its associated segmentation as a tensor.
-        """
-        images_path = path.join(dataset_path, "images", phase_name)
-        labels_path, data_type = find_labels_path(dataset_path, phase_name)
-        images_list = sorted(os.listdir(images_path))
-        if data_type == "json":
-            annotations_index, image_info_dict = create_image_info_dict(labels_path)
-        data = []
-        valid_inputs = 0
-        for image_name in tqdm(images_list, desc="Filtering data"):
-            if not image_name.lower().endswith((".jpg", ".jpeg", ".png")):
-                continue
-            image_id, _ = path.splitext(image_name)
-            if data_type == "json":
-                image_info = image_info_dict.get(image_id, None)
-                if image_info is None:
-                    continue
-                annotations = annotations_index.get(image_info["id"], [])
-                image_seg_annotations = get_scaled_segmentation(annotations, image_info)
-                if not image_seg_annotations:
-                    continue
-            elif data_type == "txt":
-                label_path = path.join(labels_path, f"{image_id}.txt")
-                if not path.isfile(label_path):
-                    continue
-                with open(label_path, "r") as file:
-                    image_seg_annotations = [list(map(float, line.strip().split())) for line in file]
-            labels = self.load_valid_labels(image_id, image_seg_annotations)
-            if labels is not None:
-                img_path = path.join(images_path, image_name)
-                data.append((img_path, labels))
-                valid_inputs += 1
-        logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
-        return data
-    def load_valid_labels(self, label_path, seg_data_one_img) -> Union[torch.Tensor, None]:
-        """
-        Loads and validates bounding box data is [0, 1] from a label file.
-        Parameters:
-            label_path (str): The filepath to the label file containing bounding box data.
-        Returns:
-            torch.Tensor or None: A tensor of all valid bounding boxes if any are found; otherwise, None.
-        """
-        bboxes = []
-        for seg_data in seg_data_one_img:
-            cls = seg_data[0]
-            points = np.array(seg_data[1:]).reshape(-1, 2)
-            valid_points = points[(points >= 0) & (points <= 1)].reshape(-1, 2)
-            if valid_points.size > 1:
-                bbox = torch.tensor([cls, *valid_points.min(axis=0), *valid_points.max(axis=0)])
-                bboxes.append(bbox)
-        if bboxes:
-            return torch.stack(bboxes)
-        else:
-            logger.warning("No valid BBox in {}", label_path)
-            return None
-    def get_data(self, idx):
-        img_path, bboxes = self.data[idx]
-        img = Image.open(img_path).convert("RGB")
-        return img, bboxes
-    def get_more_data(self, num: int = 1):
-        indices = torch.randint(0, len(self), (num,))
-        return [self.get_data(idx) for idx in indices]
-    def __getitem__(self, idx) -> Union[Image.Image, torch.Tensor]:
-        img, bboxes = self.get_data(idx)
-        if self.transform:
-            img, bboxes = self.transform(img, bboxes)
-        img = TF.to_tensor(img)
-        return img, bboxes
-    def __len__(self) -> int:
-        return len(self.data)
-class YoloDataLoader(DataLoader):
-    def __init__(self, config: dict):
-        """Initializes the YoloDataLoader with hydra-config files."""
-        hyper = config.hyper.data
-        dataset = YoloDataset(config)
-        super().__init__(
-            dataset,
-            batch_size=hyper.batch_size,
-            shuffle=hyper.shuffle,
-            num_workers=hyper.num_workers,
-            pin_memory=hyper.pin_memory,
-            collate_fn=self.collate_fn,
-        )
-    def collate_fn(self, batch: List[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, List[torch.Tensor]]:
-        """
-        A collate function to handle batching of images and their corresponding targets.
-        Args:
-            batch (list of tuples): Each tuple contains:
-                - image (torch.Tensor): The image tensor.
-                - labels (torch.Tensor): The tensor of labels for the image.
-        Returns:
-            Tuple[torch.Tensor, List[torch.Tensor]]: A tuple containing:
-                - A tensor of batched images.
-                - A list of tensors, each corresponding to bboxes for each image in the batch.
-        """
-        images = torch.stack([item[0] for item in batch])
-        targets = [item[1] for item in batch]
-        return images, targets
-def get_dataloader(config):
-    return YoloDataLoader(config)
-@hydra.main(config_path="../config", config_name="config", version_base=None)
-def main(cfg):
-    dataloader = get_dataloader(cfg)
-    draw_bboxes(*next(iter(dataloader)))
-if __name__ == "__main__":
-    import sys
-    sys.path.append("./")
-    from tools.log_helper import custom_logger
-    custom_logger()
-    main()

utils/drawer.py DELETED Viewed

@@ -1,41 +0,0 @@
-from typing import List, Union
-import torch
-from loguru import logger
-from PIL import Image, ImageDraw, ImageFont
-from torchvision.transforms.functional import to_pil_image
-def draw_bboxes(img: Union[Image.Image, torch.Tensor], bboxes: List[List[Union[int, float]]]):
-    """
-    Draw bounding boxes on an image.
-    Args:
-    - img (PIL Image or torch.Tensor): Image on which to draw the bounding boxes.
-    - bboxes (List of Lists/Tensors): Bounding boxes with [class_id, x_min, y_min, x_max, y_max],
-      where coordinates are normalized [0, 1].
-    """
-    # Convert tensor image to PIL Image if necessary
-    if isinstance(img, torch.Tensor):
-        if img.dim() > 3:
-            logger.info("Multi-frame tensor detected, using the first image.")
-            img = img[0]
-            bboxes = bboxes[0]
-        img = to_pil_image(img)
-    draw = ImageDraw.Draw(img)
-    width, height = img.size
-    font = ImageFont.load_default(30)
-    for bbox in bboxes:
-        class_id, x_min, y_min, x_max, y_max = bbox
-        x_min = x_min * width
-        x_max = x_max * width
-        y_min = y_min * height
-        y_max = y_max * height
-        shape = [(x_min, y_min), (x_max, y_max)]
-        draw.rectangle(shape, outline="red", width=3)
-        draw.text((x_min, y_min), str(int(class_id)), font=font, fill="blue")
-    img.save("visualize.jpg")  # Save the image with annotations
-    logger.info("Saved visualize image at visualize.png")

utils/get_dataset.py DELETED Viewed

@@ -1,84 +0,0 @@
-import os
-import zipfile
-import requests
-from hydra import main
-from loguru import logger
-from tqdm import tqdm
-def download_file(url, destination):
-    """
-    Downloads a file from the specified URL to the destination path with progress logging.
-    """
-    logger.info(f"Downloading {os.path.basename(destination)}...")
-    with requests.get(url, stream=True) as response:
-        response.raise_for_status()
-        total_size = int(response.headers.get("content-length", 0))
-        progress = tqdm(total=total_size, unit="iB", unit_scale=True, desc=os.path.basename(destination), leave=True)
-        with open(destination, "wb") as file:
-            for data in response.iter_content(chunk_size=1024 * 1024):  # 1 MB chunks
-                file.write(data)
-                progress.update(len(data))
-        progress.close()
-    logger.info("Download completed.")
-def unzip_file(source, destination):
-    """
-    Extracts a ZIP file to the specified directory and removes the ZIP file after extraction.
-    """
-    logger.info(f"Unzipping {os.path.basename(source)}...")
-    with zipfile.ZipFile(source, "r") as zip_ref:
-        zip_ref.extractall(destination)
-    os.remove(source)
-    logger.info(f"Removed {source}.")
-def check_files(directory, expected_count=None):
-    """
-    Returns True if the number of files in the directory matches expected_count, False otherwise.
-    """
-    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
-    return len(files) == expected_count if expected_count is not None else bool(files)
-@main(config_path="../config/data", config_name="download", version_base=None)
-def prepare_dataset(cfg):
-    """
-    Prepares dataset by downloading and unzipping if necessary.
-    """
-    data_dir = cfg.save_path
-    for data_type, settings in cfg.datasets.items():
-        base_url = settings["base_url"]
-        for dataset_type, dataset_args in settings.items():
-            if dataset_type == "base_url":
-                continue  # Skip the base_url entry
-            file_name = f"{dataset_args.get('file_name', dataset_type)}.zip"
-            url = f"{base_url}{file_name}"
-            local_zip_path = os.path.join(data_dir, file_name)
-            extract_to = os.path.join(data_dir, data_type) if data_type != "annotations" else data_dir
-            final_place = os.path.join(extract_to, dataset_type)
-            os.makedirs(extract_to, exist_ok=True)
-            if check_files(final_place, dataset_args.get("file_num")):
-                logger.info(f"Dataset {dataset_type} already verified.")
-                continue
-            if not os.path.exists(local_zip_path):
-                download_file(url, local_zip_path)
-            unzip_file(local_zip_path, extract_to)
-            if not check_files(final_place, dataset_args.get("file_num")):
-                logger.error(f"Error verifying the {dataset_type} dataset after extraction.")
-if __name__ == "__main__":
-    import sys
-    sys.path.append("./")
-    from tools.log_helper import custom_logger
-    custom_logger()
-    prepare_dataset()

utils/loss.py DELETED Viewed

@@ -1,184 +0,0 @@
-import sys
-import time
-from typing import Any, List
-import numpy as np
-import torch
-import torch.nn.functional as F
-from einops import rearrange
-from hydra import main
-from loguru import logger
-from torch import Tensor, nn
-from torch.nn import BCEWithLogitsLoss
-sys.path.append("./")
-from config.config import Config
-from tools.bbox_helper import BoxMatcher, calculate_iou, make_anchor, transform_bbox
-def get_loss_function(*args, **kwargs):
-    raise NotImplementedError
-class BCELoss(nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-        self.bce = BCEWithLogitsLoss(pos_weight=torch.tensor([1.0], device=torch.device("cuda")), reduction="none")
-    def forward(self, predicts_cls: Tensor, targets_cls: Tensor, cls_norm: Tensor) -> Any:
-        return self.bce(predicts_cls, targets_cls).sum() / cls_norm
-class BoxLoss(nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-    def forward(
-        self, predicts_bbox: Tensor, targets_bbox: Tensor, valid_masks: Tensor, box_norm: Tensor, cls_norm: Tensor
-    ) -> Any:
-        valid_bbox = valid_masks[..., None].expand(-1, -1, 4)
-        picked_predict = predicts_bbox[valid_bbox].view(-1, 4)
-        picked_targets = targets_bbox[valid_bbox].view(-1, 4)
-        iou = calculate_iou(picked_predict, picked_targets, "ciou").diag()
-        loss_iou = 1.0 - iou
-        loss_iou = (loss_iou * box_norm).sum() / cls_norm
-        return loss_iou
-class DFLoss(nn.Module):
-    def __init__(self, anchors: Tensor, scaler: Tensor, reg_max: int) -> None:
-        super().__init__()
-        self.anchors = anchors
-        self.scaler = scaler
-        self.reg_max = reg_max
-    def forward(
-        self, predicts_anc: Tensor, targets_bbox: Tensor, valid_masks: Tensor, box_norm: Tensor, cls_norm: Tensor
-    ) -> Any:
-        valid_bbox = valid_masks[..., None].expand(-1, -1, 4)
-        bbox_lt, bbox_rb = targets_bbox.chunk(2, -1)
-        anchors_norm = (self.anchors / self.scaler[:, None])[None]
-        targets_dist = torch.cat(((anchors_norm - bbox_lt), (bbox_rb - anchors_norm)), -1).clamp(0, self.reg_max - 1.01)
-        picked_targets = targets_dist[valid_bbox].view(-1)
-        picked_predict = predicts_anc[valid_bbox].view(-1, self.reg_max)
-        label_left, label_right = picked_targets.floor(), picked_targets.floor() + 1
-        weight_left, weight_right = label_right - picked_targets, picked_targets - label_left
-        loss_left = F.cross_entropy(picked_predict, label_left.to(torch.long), reduction="none")
-        loss_right = F.cross_entropy(picked_predict, label_right.to(torch.long), reduction="none")
-        loss_dfl = loss_left * weight_left + loss_right * weight_right
-        loss_dfl = loss_dfl.view(-1, 4).mean(-1)
-        loss_dfl = (loss_dfl * box_norm).sum() / cls_norm
-        return loss_dfl
-class YOLOLoss:
-    def __init__(self, cfg: Config) -> None:
-        self.reg_max = cfg.model.anchor.reg_max
-        self.class_num = cfg.hyper.data.class_num
-        self.image_size = list(cfg.hyper.data.image_size)
-        self.strides = cfg.model.anchor.strides
-        device = torch.device("cuda")
-        self.reverse_reg = torch.arange(self.reg_max, dtype=torch.float16, device=device)
-        self.scale_up = torch.tensor(self.image_size * 2, device=device)
-        self.anchors, self.scaler = make_anchor(self.image_size, self.strides, device)
-        self.cls = BCELoss()
-        self.dfl = DFLoss(self.anchors, self.scaler, self.reg_max)
-        self.iou = BoxLoss()
-        self.matcher = BoxMatcher(cfg.hyper.train.matcher, self.class_num, self.anchors)
-    def parse_predicts(self, predicts: List[Tensor]) -> Tensor:
-        """
-        args:
-            [B x AnchorClass x h1 x w1, B x AnchorClass x h2 x w2, B x AnchorClass x h3 x w3] // AnchorClass = 4 * 16 + 80
-        return:
-            [B x HW x ClassBbox] // HW = h1*w1 + h2*w2 + h3*w3, ClassBox = 80 + 4 (xyXY)
-        """
-        preds = []
-        for pred in predicts:
-            preds.append(rearrange(pred, "B AC h w -> B (h w) AC"))  # B x AC x h x w-> B x hw x AC
-        preds = torch.concat(preds, dim=1)  # -> B x (H W) x AC
-        preds_anc, preds_cls = torch.split(preds, (self.reg_max * 4, self.class_num), dim=-1)
-        preds_anc = rearrange(preds_anc, "B  hw (P R)-> B hw P R", P=4)
-        pred_LTRB = preds_anc.softmax(dim=-1) @ self.reverse_reg * self.scaler.view(1, -1, 1)
-        lt, rb = pred_LTRB.chunk(2, dim=-1)
-        pred_minXY = self.anchors - lt
-        pred_maxXY = self.anchors + rb
-        predicts = torch.cat([preds_cls, pred_minXY, pred_maxXY], dim=-1)
-        return predicts, preds_anc
-    def parse_targets(self, targets: Tensor, batch_size: int = 16) -> List[Tensor]:
-        """
-        return List:
-        """
-        targets[:, 2:] = transform_bbox(targets[:, 2:], "xycwh -> xyxy") * self.scale_up
-        bbox_num = targets[:, 0].int().bincount()
-        batch_targets = torch.zeros(batch_size, bbox_num.max(), 5, device=targets.device)
-        for instance_idx, bbox_num in enumerate(bbox_num):
-            instance_targets = targets[targets[:, 0] == instance_idx]
-            batch_targets[instance_idx, :bbox_num] = instance_targets[:, 1:].detach()
-        return batch_targets
-    def separate_anchor(self, anchors):
-        """
-        separate anchor and bbouding box
-        """
-        anchors_cls, anchors_box = torch.split(anchors, (self.class_num, 4), dim=-1)
-        anchors_box = anchors_box / self.scaler[None, :, None]
-        return anchors_cls, anchors_box
-    @torch.autocast("cuda")
-    def __call__(self, predicts: List[Tensor], targets: Tensor) -> Tensor:
-        # Batch_Size x (Anchor + Class) x H x W
-        tlist = [time.time()]
-        # TODO: check datatype, why targets has a little bit error with origin version
-        predicts, predicts_anc = self.parse_predicts(predicts[0])
-        targets = self.parse_targets(targets)
-        align_targets, valid_masks = self.matcher(targets, predicts)
-        # calculate loss between with instance and predict
-        targets_cls, targets_bbox = self.separate_anchor(align_targets)
-        predicts_cls, predicts_bbox = self.separate_anchor(predicts)
-        cls_norm = targets_cls.sum()
-        box_norm = targets_cls.sum(-1)[valid_masks]
-        ## -- CLS -- ##
-        loss_cls = self.cls(predicts_cls, targets_cls, cls_norm)
-        ## -- IOU -- ##
-        loss_iou = self.iou(predicts_bbox, targets_bbox, valid_masks, box_norm, cls_norm)
-        ## -- DFL -- ##
-        loss_dfl = self.dfl(predicts_anc, targets_bbox, valid_masks, box_norm, cls_norm)
-        logger.info("Loss IoU: {:.5f}, DFL: {:.5f}, CLS: {:.5f}", loss_iou, loss_dfl, loss_cls)
-        tlist.append(time.time())
-        logger.info(f"Calculate Loss Run Time {np.diff(np.array(tlist)) * 1e3} ms")
-@main(config_path="../config", config_name="config", version_base=None)
-def main(cfg):
-    losser = YOLOLoss(cfg)
-    targets = torch.load("targets.pt")
-    predicts = torch.load("predicts.pt")
-    losser(predicts, targets)
-if __name__ == "__main__":
-    import sys
-    sys.path.append("./")
-    from tools.log_helper import custom_logger
-    custom_logger()
-    main()

yolo/config/config.py CHANGED Viewed

@@ -2,9 +2,15 @@ from dataclasses import dataclass
 from typing import Dict, List, Union
 @dataclass
 class Model:
-    anchor: List[List[int]]
     model: Dict[str, List[Dict[str, Union[Dict, List, int]]]]
@@ -20,6 +26,8 @@ class DataLoaderConfig:
     shuffle: bool
     num_workers: int
     pin_memory: bool
 @dataclass
@@ -52,11 +60,19 @@ class EMAConfig:
     decay: float
 @dataclass
 class TrainConfig:
     optimizer: OptimizerConfig
     scheduler: SchedulerConfig
     ema: EMAConfig
 @dataclass

 from typing import Dict, List, Union
+@dataclass
+class AnchorConfig:
+    reg_max: int
+    strides: List[int]
 @dataclass
 class Model:
+    anchor: AnchorConfig
     model: Dict[str, List[Dict[str, Union[Dict, List, int]]]]
     shuffle: bool
     num_workers: int
     pin_memory: bool
+    image_size: List[int]
+    class_num: int
 @dataclass
     decay: float
+@dataclass
+class MatcherConfig:
+    iou: str
+    topk: int
+    factor: Dict[str, int]
 @dataclass
 class TrainConfig:
     optimizer: OptimizerConfig
     scheduler: SchedulerConfig
     ema: EMAConfig
+    matcher: MatcherConfig
 @dataclass

yolo/config/hyper/default.yaml CHANGED Viewed

@@ -3,12 +3,28 @@ data:
   shuffle: True
   num_workers: 4
   pin_memory: True
 train:
   optimizer:
     type: Adam
     args:
       lr: 0.001
       weight_decay: 0.0001
   scheduler:
     type: StepLR
     args:

   shuffle: True
   num_workers: 4
   pin_memory: True
+  class_num: 80
+  image_size: [640, 640]
 train:
   optimizer:
     type: Adam
     args:
       lr: 0.001
       weight_decay: 0.0001
+  loss:
+    BCELoss:
+      args:
+    BoxLoss:
+      args:
+      alpha: 0.1
+    DFLoss:
+      args:
+  matcher:
+    iou: CIoU
+    topk: 10
+    factor:
+      iou: 6.0
+      cls: 0.5
   scheduler:
     type: StepLR
     args:

yolo/tools/bbox_helper.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch
 import torch.nn.functional as F
 from torch import Tensor
-from config.config import MatcherConfig
 def calculate_iou(bbox1, bbox2, metrics="iou") -> Tensor:

 import torch.nn.functional as F
 from torch import Tensor
+from yolo.config.config import MatcherConfig
 def calculate_iou(bbox1, bbox2, metrics="iou") -> Tensor:

yolo/utils/loss.py CHANGED Viewed

@@ -1,2 +1,166 @@
 def get_loss_function(*args, **kwargs):
     raise NotImplementedError

+import time
+from typing import Any, List, Tuple
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from hydra import main
+from loguru import logger
+from torch import Tensor, nn
+from torch.nn import BCEWithLogitsLoss
+from yolo.config.config import Config
+from yolo.tools.bbox_helper import (
+    BoxMatcher,
+    calculate_iou,
+    make_anchor,
+    transform_bbox,
+)
 def get_loss_function(*args, **kwargs):
     raise NotImplementedError
+class BCELoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.bce = BCEWithLogitsLoss(pos_weight=torch.tensor([1.0], device=torch.device("cuda")), reduction="none")
+    def forward(self, predicts_cls: Tensor, targets_cls: Tensor, cls_norm: Tensor) -> Any:
+        return self.bce(predicts_cls, targets_cls).sum() / cls_norm
+class BoxLoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+    def forward(
+        self, predicts_bbox: Tensor, targets_bbox: Tensor, valid_masks: Tensor, box_norm: Tensor, cls_norm: Tensor
+    ) -> Any:
+        valid_bbox = valid_masks[..., None].expand(-1, -1, 4)
+        picked_predict = predicts_bbox[valid_bbox].view(-1, 4)
+        picked_targets = targets_bbox[valid_bbox].view(-1, 4)
+        iou = calculate_iou(picked_predict, picked_targets, "ciou").diag()
+        loss_iou = 1.0 - iou
+        loss_iou = (loss_iou * box_norm).sum() / cls_norm
+        return loss_iou
+class DFLoss(nn.Module):
+    def __init__(self, anchors: Tensor, scaler: Tensor, reg_max: int) -> None:
+        super().__init__()
+        self.anchors = anchors
+        self.scaler = scaler
+        self.reg_max = reg_max
+    def forward(
+        self, predicts_anc: Tensor, targets_bbox: Tensor, valid_masks: Tensor, box_norm: Tensor, cls_norm: Tensor
+    ) -> Any:
+        valid_bbox = valid_masks[..., None].expand(-1, -1, 4)
+        bbox_lt, bbox_rb = targets_bbox.chunk(2, -1)
+        anchors_norm = (self.anchors / self.scaler[:, None])[None]
+        targets_dist = torch.cat(((anchors_norm - bbox_lt), (bbox_rb - anchors_norm)), -1).clamp(0, self.reg_max - 1.01)
+        picked_targets = targets_dist[valid_bbox].view(-1)
+        picked_predict = predicts_anc[valid_bbox].view(-1, self.reg_max)
+        label_left, label_right = picked_targets.floor(), picked_targets.floor() + 1
+        weight_left, weight_right = label_right - picked_targets, picked_targets - label_left
+        loss_left = F.cross_entropy(picked_predict, label_left.to(torch.long), reduction="none")
+        loss_right = F.cross_entropy(picked_predict, label_right.to(torch.long), reduction="none")
+        loss_dfl = loss_left * weight_left + loss_right * weight_right
+        loss_dfl = loss_dfl.view(-1, 4).mean(-1)
+        loss_dfl = (loss_dfl * box_norm).sum() / cls_norm
+        return loss_dfl
+class YOLOLoss:
+    def __init__(self, cfg: Config) -> None:
+        self.reg_max = cfg.model.anchor.reg_max
+        self.class_num = cfg.hyper.data.class_num
+        self.image_size = list(cfg.hyper.data.image_size)
+        self.strides = cfg.model.anchor.strides
+        device = torch.device("cuda")
+        self.reverse_reg = torch.arange(self.reg_max, dtype=torch.float16, device=device)
+        self.scale_up = torch.tensor(self.image_size * 2, device=device)
+        self.anchors, self.scaler = make_anchor(self.image_size, self.strides, device)
+        self.cls = BCELoss()
+        self.dfl = DFLoss(self.anchors, self.scaler, self.reg_max)
+        self.iou = BoxLoss()
+        self.matcher = BoxMatcher(cfg.hyper.train.matcher, self.class_num, self.anchors)
+    def parse_predicts(self, predicts: List[Tensor]) -> Tensor:
+        """
+        args:
+            [B x AnchorClass x h1 x w1, B x AnchorClass x h2 x w2, B x AnchorClass x h3 x w3] // AnchorClass = 4 * 16 + 80
+        return:
+            [B x HW x ClassBbox] // HW = h1*w1 + h2*w2 + h3*w3, ClassBox = 80 + 4 (xyXY)
+        """
+        preds = []
+        for pred in predicts:
+            preds.append(rearrange(pred, "B AC h w -> B (h w) AC"))  # B x AC x h x w-> B x hw x AC
+        preds = torch.concat(preds, dim=1)  # -> B x (H W) x AC
+        preds_anc, preds_cls = torch.split(preds, (self.reg_max * 4, self.class_num), dim=-1)
+        preds_anc = rearrange(preds_anc, "B  hw (P R)-> B hw P R", P=4)
+        pred_LTRB = preds_anc.softmax(dim=-1) @ self.reverse_reg * self.scaler.view(1, -1, 1)
+        lt, rb = pred_LTRB.chunk(2, dim=-1)
+        pred_minXY = self.anchors - lt
+        pred_maxXY = self.anchors + rb
+        predicts = torch.cat([preds_cls, pred_minXY, pred_maxXY], dim=-1)
+        return predicts, preds_anc
+    def parse_targets(self, targets: Tensor, batch_size: int = 16) -> List[Tensor]:
+        """
+        return List:
+        """
+        targets[:, 2:] = transform_bbox(targets[:, 2:], "xycwh -> xyxy") * self.scale_up
+        bbox_num = targets[:, 0].int().bincount()
+        batch_targets = torch.zeros(batch_size, bbox_num.max(), 5, device=targets.device)
+        for instance_idx, bbox_num in enumerate(bbox_num):
+            instance_targets = targets[targets[:, 0] == instance_idx]
+            batch_targets[instance_idx, :bbox_num] = instance_targets[:, 1:].detach()
+        return batch_targets
+    def separate_anchor(self, anchors):
+        """
+        separate anchor and bbouding box
+        """
+        anchors_cls, anchors_box = torch.split(anchors, (self.class_num, 4), dim=-1)
+        anchors_box = anchors_box / self.scaler[None, :, None]
+        return anchors_cls, anchors_box
+    @torch.autocast("cuda")
+    def __call__(self, predicts: List[Tensor], targets: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        # Batch_Size x (Anchor + Class) x H x W
+        # TODO: check datatype, why targets has a little bit error with origin version
+        predicts, predicts_anc = self.parse_predicts(predicts[0])
+        targets = self.parse_targets(targets)
+        align_targets, valid_masks = self.matcher(targets, predicts)
+        # calculate loss between with instance and predict
+        targets_cls, targets_bbox = self.separate_anchor(align_targets)
+        predicts_cls, predicts_bbox = self.separate_anchor(predicts)
+        cls_norm = targets_cls.sum()
+        box_norm = targets_cls.sum(-1)[valid_masks]
+        ## -- CLS -- ##
+        loss_cls = self.cls(predicts_cls, targets_cls, cls_norm)
+        ## -- IOU -- ##
+        loss_iou = self.iou(predicts_bbox, targets_bbox, valid_masks, box_norm, cls_norm)
+        ## -- DFL -- ##
+        loss_dfl = self.dfl(predicts_anc, targets_bbox, valid_masks, box_norm, cls_norm)
+        logger.info("Loss IoU: {:.5f}, DFL: {:.5f}, CLS: {:.5f}", loss_iou, loss_dfl, loss_cls)
+        return loss_iou, loss_dfl, loss_cls