Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on May 23, 2024

Commit

b44d6bb

2 Parent(s): d24904a 1197f7d

🔀 [Merge] branch 'SETUP' into MODEL

Browse files

Files changed (27) hide show

LICENSE +21 -0
README.md +13 -13
examples/example_train.py +35 -0
{config → yolo/config}/README.md +0 -0
yolo/config/config.py +91 -0
yolo/config/config.yaml +11 -0
yolo/config/data/augmentation.yaml +3 -0
{config → yolo/config}/data/coco.yaml +0 -0
yolo/config/data/download.yaml +21 -0
yolo/config/hyper/default.yaml +19 -0
{config → yolo/config}/model/v7-base.yaml +0 -0
{model → yolo/model}/README.md +0 -0
{model → yolo/model}/module.py +0 -0
{model → yolo/model}/yolo.py +1 -1
yolo/tools/__init__.py +0 -0
yolo/tools/dataset_helper.py +103 -0
{tools → yolo/tools}/layer_helper.py +1 -1
{tools → yolo/tools}/log_helper.py +0 -0
{tools → yolo/tools}/model_helper.py +1 -1
{tools → yolo/tools}/trainer.py +4 -4
{utils → yolo/utils}/README.md +0 -0
yolo/utils/converter_json2txt.py +90 -0
yolo/utils/data_augment.py +125 -0
yolo/utils/dataloader.py +206 -0
yolo/utils/drawer.py +41 -0
yolo/utils/get_dataset.py +84 -0
yolo/utils/loss.py +2 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Kin-Yiu, Wong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -20,31 +20,31 @@ If you are interested in contributing, please keep an eye on project updates or
 ## To-Do Lists
 - [ ] Project Setup
     - [X] requirements
-    - [ ] LICENSE
     - [ ] README
-    - [ ] pytests
     - [ ] setup.py/pip install
-    - [ ] log format
     - [ ] hugging face
 - [ ] Data proccess
     - [ ] Dataset
-        - [ ] Download script
         - [ ] Auto Download
         - [ ] xywh, xxyy, xcyc
-    - [ ] Dataloder
-        - [ ] Data augment
 - [ ] Model
     - [ ] load model
         - [ ] from yaml
         - [ ] from github
-    - [ ] trainer
-        - [ ] train_one_iter
-        - [ ] train_one_epoch
-    - [ ] DDP, EMA, OTA
 - [ ] Run
     - [ ] train
     - [ ] test
     - [ ] demo
-- [ ] Configuration
-    - [ ] hyperparams: dataclass
-    - [ ] model cfg: yaml

 ## To-Do Lists
 - [ ] Project Setup
     - [X] requirements
+    - [x] LICENSE
     - [ ] README
+    - [x] pytests
     - [ ] setup.py/pip install
+    - [x] log format
     - [ ] hugging face
 - [ ] Data proccess
     - [ ] Dataset
+        - [x] Download script
         - [ ] Auto Download
         - [ ] xywh, xxyy, xcyc
+    - [x] Dataloder
+        - [x] Data arugment
 - [ ] Model
     - [ ] load model
         - [ ] from yaml
         - [ ] from github
+    - [x] trainer
+        - [x] train_one_iter
+        - [x] train_one_epoch
+    - [ ] DDP
+    - [x] EMA, OTA
+- [ ] Loss
 - [ ] Run
     - [ ] train
     - [ ] test
     - [ ] demo
+- [x] Configuration

examples/example_train.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import sys
+from pathlib import Path
+import hydra
+import torch
+from loguru import logger
+project_root = Path(__file__).resolve().parent.parent
+sys.path.append(str(project_root))
+from yolo.config.config import Config
+from yolo.model.yolo import get_model
+from yolo.tools.log_helper import custom_logger
+from yolo.tools.trainer import Trainer
+from yolo.utils.dataloader import get_dataloader
+from yolo.utils.get_dataset import prepare_dataset
+@hydra.main(config_path="../yolo/config", config_name="config", version_base=None)
+def main(cfg: Config):
+    if cfg.download.auto:
+        prepare_dataset(cfg.download)
+    dataloader = get_dataloader(cfg)
+    model = get_model(cfg.model)
+    # TODO: get_device or rank, for DDP mode
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    trainer = Trainer(model, cfg.hyper.train, device)
+    trainer.train(dataloader, 10)
+if __name__ == "__main__":
+    custom_logger()
+    main()

{config → yolo/config}/README.md RENAMED Viewed

File without changes

yolo/config/config.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from dataclasses import dataclass
+from typing import Dict, List, Union
+@dataclass
+class Model:
+    anchor: List[List[int]]
+    model: Dict[str, List[Dict[str, Union[Dict, List, int]]]]
+@dataclass
+class Download:
+    auto: bool
+    path: str
+@dataclass
+class DataLoaderConfig:
+    batch_size: int
+    shuffle: bool
+    num_workers: int
+    pin_memory: bool
+@dataclass
+class OptimizerArgs:
+    lr: float
+    weight_decay: float
+@dataclass
+class OptimizerConfig:
+    type: str
+    args: OptimizerArgs
+@dataclass
+class SchedulerArgs:
+    step_size: int
+    gamma: float
+@dataclass
+class SchedulerConfig:
+    type: str
+    args: SchedulerArgs
+@dataclass
+class EMAConfig:
+    enabled: bool
+    decay: float
+@dataclass
+class TrainConfig:
+    optimizer: OptimizerConfig
+    scheduler: SchedulerConfig
+    ema: EMAConfig
+@dataclass
+class HyperConfig:
+    data: DataLoaderConfig
+    train: TrainConfig
+@dataclass
+class Dataset:
+    file_name: str
+    num_files: int
+@dataclass
+class Datasets:
+    base_url: str
+    images: Dict[str, Dataset]
+@dataclass
+class Download:
+    auto: bool
+    save_path: str
+    datasets: Datasets
+@dataclass
+class Config:
+    model: Model
+    download: Download
+    hyper: HyperConfig

yolo/config/config.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+hydra:
+  run:
+    dir: ./runs
+defaults:
+ - data: coco
+ - download: ../data/download
+ - augmentation: ../data/augmentation
+ - model: v7-base
+ - hyper: default
+ - _self_

yolo/config/data/augmentation.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+Mosaic: 1
+# MixUp: 1
+HorizontalFlip: 0.5

{config → yolo/config}/data/coco.yaml RENAMED Viewed

File without changes

yolo/config/data/download.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+auto: True
+save_path: data/coco
+datasets:
+  images:
+    base_url: http://images.cocodataset.org/zips/
+    train2017:
+      file_name: train2017
+      file_num: 118287
+    val2017:
+      file_name: val2017
+      file_num: 5000
+    test2017:
+      file_name: test2017
+      file_num: 40670
+  annotations:
+    base_url: http://images.cocodataset.org/annotations/
+    annotations:
+      file_name: annotations_trainval2017
+hydra:
+  run:
+    dir: ./runs

yolo/config/hyper/default.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+data:
+  batch_size: 4
+  shuffle: True
+  num_workers: 4
+  pin_memory: True
+train:
+  optimizer:
+    type: Adam
+    args:
+      lr: 0.001
+      weight_decay: 0.0001
+  scheduler:
+    type: StepLR
+    args:
+      step_size: 10
+      gamma: 0.1
+  ema:
+      enabled: true
+      decay: 0.995

{config → yolo/config}/model/v7-base.yaml RENAMED Viewed

File without changes

{model → yolo/model}/README.md RENAMED Viewed

File without changes

{model → yolo/model}/module.py RENAMED Viewed

File without changes

{model → yolo/model}/yolo.py RENAMED Viewed

@@ -5,7 +5,7 @@ import torch.nn as nn
 from loguru import logger
 from omegaconf import OmegaConf
-from tools.layer_helper import get_layer_map
 class YOLO(nn.Module):

 from loguru import logger
 from omegaconf import OmegaConf
+from yolo.tools.layer_helper import get_layer_map
 class YOLO(nn.Module):

yolo/tools/__init__.py ADDED Viewed

File without changes

yolo/tools/dataset_helper.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import json
+import os
+from itertools import chain
+from os import path
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+def find_labels_path(dataset_path: str, phase_name: str):
+    """
+    Find the path to label files for a specified dataset and phase(e.g. training).
+    Args:
+        dataset_path (str): The path to the root directory of the dataset.
+        phase_name (str): The name of the phase for which labels are being searched (e.g., "train", "val", "test").
+    Returns:
+        Tuple[str, str]: A tuple containing the path to the labels file and the file format ("json" or "txt").
+    """
+    json_labels_path = path.join(dataset_path, "annotations", f"instances_{phase_name}.json")
+    txt_labels_path = path.join(dataset_path, "label", phase_name)
+    if path.isfile(json_labels_path):
+        return json_labels_path, "json"
+    elif path.isdir(txt_labels_path):
+        txt_files = [f for f in os.listdir(txt_labels_path) if f.endswith(".txt")]
+        if txt_files:
+            return txt_labels_path, "txt"
+    raise FileNotFoundError("No labels found in the specified dataset path and phase name.")
+def create_image_info_dict(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]:
+    """
+    Create a dictionary containing image information and annotations indexed by image ID.
+    Args:
+        labels_path (str): The path to the annotation json file.
+    Returns:
+        - annotations_index: A dictionary where keys are image IDs and values are lists of annotations.
+        - image_info_dict: A dictionary where keys are image file names without extension and values are image information dictionaries.
+    """
+    with open(labels_path, "r") as file:
+        labels_data = json.load(file)
+        annotations_index = index_annotations_by_image(labels_data)  # check lookup is a good name?
+        image_info_dict = {path.splitext(img["file_name"])[0]: img for img in labels_data["images"]}
+        return annotations_index, image_info_dict
+def index_annotations_by_image(data: Dict[str, Any]):
+    """
+    Use image index to lookup every annotations
+    Args:
+        data (Dict[str, Any]): A dictionary containing annotation data.
+    Returns:
+        Dict[int, List[Dict[str, Any]]]: A dictionary where keys are image IDs and values are lists of annotations.
+        Annotations with "iscrowd" set to True are excluded from the index.
+    """
+    annotation_lookup = {}
+    for anno in data["annotations"]:
+        if anno["iscrowd"]:
+            continue
+        image_id = anno["image_id"]
+        if image_id not in annotation_lookup:
+            annotation_lookup[image_id] = []
+        annotation_lookup[image_id].append(anno)
+    return annotation_lookup
+def get_scaled_segmentation(
+    annotations: List[Dict[str, Any]], image_dimensions: Dict[str, int]
+) -> Optional[List[List[float]]]:
+    """
+    Scale the segmentation data based on image dimensions and return a list of scaled segmentation data.
+    Args:
+        annotations (List[Dict[str, Any]]): A list of annotation dictionaries.
+        image_dimensions (Dict[str, int]): A dictionary containing image dimensions (height and width).
+    Returns:
+        Optional[List[List[float]]]: A list of scaled segmentation data, where each sublist contains category_id followed by scaled (x, y) coordinates.
+    """
+    if annotations is None:
+        return None
+    seg_array_with_cat = []
+    h, w = image_dimensions["height"], image_dimensions["width"]
+    for anno in annotations:
+        category_id = anno["category_id"]
+        seg_list = [item for sublist in anno["segmentation"] for item in sublist]
+        scaled_seg_data = (
+            np.array(seg_list).reshape(-1, 2) / [w, h]
+        ).tolist()  # make the list group in x, y pairs and scaled with image width, height
+        scaled_flat_seg_data = [category_id] + list(chain(*scaled_seg_data))  # flatten the scaled_seg_data list
+        seg_array_with_cat.append(scaled_flat_seg_data)
+    return seg_array_with_cat

{tools → yolo/tools}/layer_helper.py RENAMED Viewed

@@ -2,7 +2,7 @@ import inspect
 import torch.nn as nn
-from model import module
 def auto_pad():

 import torch.nn as nn
+from yolo.model import module
 def auto_pad():

{tools → yolo/tools}/log_helper.py RENAMED Viewed

File without changes

{tools → yolo/tools}/model_helper.py RENAMED Viewed

@@ -4,7 +4,7 @@ import torch
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
-from config.config import OptimizerConfig, SchedulerConfig
 class EMA:

 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
+from yolo.config.config import OptimizerConfig, SchedulerConfig
 class EMA:

{tools → yolo/tools}/trainer.py RENAMED Viewed

@@ -2,10 +2,10 @@ import torch
 from loguru import logger
 from tqdm import tqdm
-from config.config import TrainConfig
-from model.yolo import YOLO
-from tools.model_helper import EMA, get_optimizer, get_scheduler
-from utils.loss import get_loss_function
 class Trainer:

 from loguru import logger
 from tqdm import tqdm
+from yolo.config.config import TrainConfig
+from yolo.model.yolo import YOLO
+from yolo.tools.model_helper import EMA, get_optimizer, get_scheduler
+from yolo.utils.loss import get_loss_function
 class Trainer:

{utils → yolo/utils}/README.md RENAMED Viewed

File without changes

yolo/utils/converter_json2txt.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import json
+import os
+from typing import Dict, List, Optional
+from tqdm import tqdm
+def discretize_categories(categories: List[Dict[str, int]]) -> Dict[int, int]:
+    """
+    Maps each unique 'id' in the list of category dictionaries to a sequential integer index.
+    Indices are assigned based on the sorted 'id' values.
+    """
+    sorted_categories = sorted(categories, key=lambda category: category["id"])
+    return {category["id"]: index for index, category in enumerate(sorted_categories)}
+def process_annotations(
+    image_annotations: Dict[int, List[Dict]],
+    image_info_dict: Dict[int, tuple],
+    output_dir: str,
+    id_to_idx: Optional[Dict[int, int]] = None,
+) -> None:
+    """
+    Process and save annotations to files, with option to remap category IDs.
+    """
+    for image_id, annotations in tqdm(image_annotations.items(), desc="Processing annotations"):
+        file_path = os.path.join(output_dir, f"{image_id:0>12}.txt")
+        if not annotations:
+            continue
+        with open(file_path, "w") as file:
+            for annotation in annotations:
+                process_annotation(annotation, image_info_dict[image_id], id_to_idx, file)
+def process_annotation(annotation: Dict, image_dims: tuple, id_to_idx: Optional[Dict[int, int]], file) -> None:
+    """
+    Convert a single annotation's segmentation and write it to the open file handle.
+    """
+    category_id = annotation["category_id"]
+    segmentation = (
+        annotation["segmentation"][0]
+        if annotation["segmentation"] and isinstance(annotation["segmentation"][0], list)
+        else None
+    )
+    if segmentation is None:
+        return
+    img_width, img_height = image_dims
+    normalized_segmentation = normalize_segmentation(segmentation, img_width, img_height)
+    if id_to_idx:
+        category_id = id_to_idx.get(category_id, category_id)
+    file.write(f"{category_id} {' '.join(normalized_segmentation)}\n")
+def normalize_segmentation(segmentation: List[float], img_width: int, img_height: int) -> List[str]:
+    """
+    Normalize and format segmentation coordinates.
+    """
+    normalized = [
+        f"{coord / img_width:.6f}" if index % 2 == 0 else f"{coord / img_height:.6f}"
+        for index, coord in enumerate(segmentation)
+    ]
+    return normalized
+def convert_annotations(json_file: str, output_dir: str) -> None:
+    """
+    Load annotation data from a JSON file and process all annotations.
+    """
+    with open(json_file) as file:
+        data = json.load(file)
+    os.makedirs(output_dir, exist_ok=True)
+    image_info_dict = {img["id"]: (img["width"], img["height"]) for img in data.get("images", [])}
+    id_to_idx = discretize_categories(data.get("categories", [])) if "categories" in data else None
+    image_annotations = {img_id: [] for img_id in image_info_dict}
+    for annotation in data.get("annotations", []):
+        if not annotation.get("iscrowd", False):
+            image_annotations[annotation["image_id"]].append(annotation)
+    process_annotations(image_annotations, image_info_dict, output_dir, id_to_idx)
+convert_annotations("./data/coco/annotations/instances_train2017.json", "./data/coco/labels/train2017/")
+convert_annotations("./data/coco/annotations/instances_val2017.json", "./data/coco/labels/val2017/")

yolo/utils/data_augment.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import numpy as np
+import torch
+from PIL import Image
+from torchvision.transforms import functional as TF
+class Compose:
+    """Composes several transforms together."""
+    def __init__(self, transforms, image_size: int = 640):
+        self.transforms = transforms
+        self.image_size = image_size
+        for transform in self.transforms:
+            if hasattr(transform, "set_parent"):
+                transform.set_parent(self)
+    def __call__(self, image, boxes):
+        for transform in self.transforms:
+            image, boxes = transform(image, boxes)
+        return image, boxes
+class HorizontalFlip:
+    """Randomly horizontally flips the image along with the bounding boxes."""
+    def __init__(self, prob=0.5):
+        self.prob = prob
+    def __call__(self, image, boxes):
+        if torch.rand(1) < self.prob:
+            image = TF.hflip(image)
+            boxes[:, [1, 3]] = 1 - boxes[:, [3, 1]]
+        return image, boxes
+class VerticalFlip:
+    """Randomly vertically flips the image along with the bounding boxes."""
+    def __init__(self, prob=0.5):
+        self.prob = prob
+    def __call__(self, image, boxes):
+        if torch.rand(1) < self.prob:
+            image = TF.vflip(image)
+            boxes[:, [2, 4]] = 1 - boxes[:, [4, 2]]
+        return image, boxes
+class Mosaic:
+    """Applies the Mosaic augmentation to a batch of images and their corresponding boxes."""
+    def __init__(self, prob=0.5):
+        self.prob = prob
+        self.parent = None
+    def set_parent(self, parent):
+        self.parent = parent
+    def __call__(self, image, boxes):
+        if torch.rand(1) >= self.prob:
+            return image, boxes
+        assert self.parent is not None, "Parent is not set. Mosaic cannot retrieve image size."
+        img_sz = self.parent.image_size  # Assuming `image_size` is defined in parent
+        more_data = self.parent.get_more_data(3)  # get 3 more images randomly
+        data = [(image, boxes)] + more_data
+        mosaic_image = Image.new("RGB", (2 * img_sz, 2 * img_sz))
+        vectors = np.array([(-1, -1), (0, -1), (-1, 0), (0, 0)])
+        center = np.array([img_sz, img_sz])
+        all_labels = []
+        for (image, boxes), vector in zip(data, vectors):
+            this_w, this_h = image.size
+            coord = tuple(center + vector * np.array([this_w, this_h]))
+            mosaic_image.paste(image, coord)
+            xmin, ymin, xmax, ymax = boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
+            xmin = (xmin * this_w + coord[0]) / (2 * img_sz)
+            xmax = (xmax * this_w + coord[0]) / (2 * img_sz)
+            ymin = (ymin * this_h + coord[1]) / (2 * img_sz)
+            ymax = (ymax * this_h + coord[1]) / (2 * img_sz)
+            adjusted_boxes = torch.stack([boxes[:, 0], xmin, ymin, xmax, ymax], dim=1)
+            all_labels.append(adjusted_boxes)
+        all_labels = torch.cat(all_labels, dim=0)
+        mosaic_image = mosaic_image.resize((img_sz, img_sz))
+        return mosaic_image, all_labels
+class MixUp:
+    """Applies the MixUp augmentation to a pair of images and their corresponding boxes."""
+    def __init__(self, prob=0.5, alpha=1.0):
+        self.alpha = alpha
+        self.prob = prob
+        self.parent = None
+    def set_parent(self, parent):
+        """Set the parent dataset object for accessing dataset methods."""
+        self.parent = parent
+    def __call__(self, image, boxes):
+        if torch.rand(1) >= self.prob:
+            return image, boxes
+        assert self.parent is not None, "Parent is not set. MixUp cannot retrieve additional data."
+        # Retrieve another image and its boxes randomly from the dataset
+        image2, boxes2 = self.parent.get_more_data()[0]
+        # Calculate the mixup lambda parameter
+        lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0 else 0.5
+        # Mix images
+        image1, image2 = TF.to_tensor(image), TF.to_tensor(image2)
+        mixed_image = lam * image1 + (1 - lam) * image2
+        # Mix bounding boxes
+        mixed_boxes = torch.cat([lam * boxes, (1 - lam) * boxes2])
+        return TF.to_pil_image(mixed_image), mixed_boxes

yolo/utils/dataloader.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+from os import path
+from typing import List, Tuple, Union
+import diskcache as dc
+import hydra
+import numpy as np
+import torch
+from loguru import logger
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from torchvision.transforms import functional as TF
+from tqdm.rich import tqdm
+from yolo.tools.dataset_helper import (
+    create_image_info_dict,
+    find_labels_path,
+    get_scaled_segmentation,
+)
+from yolo.utils.data_augment import Compose, HorizontalFlip, MixUp, Mosaic, VerticalFlip
+from yolo.utils.drawer import draw_bboxes
+class YoloDataset(Dataset):
+    def __init__(self, config: dict, phase: str = "train2017", image_size: int = 640):
+        dataset_cfg = config.data
+        augment_cfg = config.augmentation
+        phase_name = dataset_cfg.get(phase, phase)
+        self.image_size = image_size
+        transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
+        self.transform = Compose(transforms, self.image_size)
+        self.transform.get_more_data = self.get_more_data
+        self.data = self.load_data(dataset_cfg.path, phase_name)
+    def load_data(self, dataset_path, phase_name):
+        """
+        Loads data from a cache or generates a new cache for a specific dataset phase.
+        Parameters:
+            dataset_path (str): The root path to the dataset directory.
+            phase_name (str): The specific phase of the dataset (e.g., 'train', 'test') to load or generate data for.
+        Returns:
+            dict: The loaded data from the cache for the specified phase.
+        """
+        cache_path = path.join(dataset_path, ".cache")
+        cache = dc.Cache(cache_path)
+        data = cache.get(phase_name)
+        if data is None:
+            logger.info("Generating {} cache", phase_name)
+            data = self.filter_data(dataset_path, phase_name)
+            cache[phase_name] = data
+        cache.close()
+        logger.info("📦 Loaded {} cache", phase_name)
+        data = cache[phase_name]
+        return data
+    def filter_data(self, dataset_path: str, phase_name: str) -> list:
+        """
+        Filters and collects dataset information by pairing images with their corresponding labels.
+        Parameters:
+            images_path (str): Path to the directory containing image files.
+            labels_path (str): Path to the directory containing label files.
+        Returns:
+            list: A list of tuples, each containing the path to an image file and its associated segmentation as a tensor.
+        """
+        images_path = path.join(dataset_path, "images", phase_name)
+        labels_path, data_type = find_labels_path(dataset_path, phase_name)
+        images_list = sorted(os.listdir(images_path))
+        if data_type == "json":
+            annotations_index, image_info_dict = create_image_info_dict(labels_path)
+        data = []
+        valid_inputs = 0
+        for image_name in tqdm(images_list, desc="Filtering data"):
+            if not image_name.lower().endswith((".jpg", ".jpeg", ".png")):
+                continue
+            image_id, _ = path.splitext(image_name)
+            if data_type == "json":
+                image_info = image_info_dict.get(image_id, None)
+                if image_info is None:
+                    continue
+                annotations = annotations_index.get(image_info["id"], [])
+                image_seg_annotations = get_scaled_segmentation(annotations, image_info)
+                if not image_seg_annotations:
+                    continue
+            elif data_type == "txt":
+                label_path = path.join(labels_path, f"{image_id}.txt")
+                if not path.isfile(label_path):
+                    continue
+                with open(label_path, "r") as file:
+                    image_seg_annotations = [list(map(float, line.strip().split())) for line in file]
+            labels = self.load_valid_labels(image_id, image_seg_annotations)
+            if labels is not None:
+                img_path = path.join(images_path, image_name)
+                data.append((img_path, labels))
+                valid_inputs += 1
+        logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
+        return data
+    def load_valid_labels(self, label_path, seg_data_one_img) -> Union[torch.Tensor, None]:
+        """
+        Loads and validates bounding box data is [0, 1] from a label file.
+        Parameters:
+            label_path (str): The filepath to the label file containing bounding box data.
+        Returns:
+            torch.Tensor or None: A tensor of all valid bounding boxes if any are found; otherwise, None.
+        """
+        bboxes = []
+        for seg_data in seg_data_one_img:
+            cls = seg_data[0]
+            points = np.array(seg_data[1:]).reshape(-1, 2)
+            valid_points = points[(points >= 0) & (points <= 1)].reshape(-1, 2)
+            if valid_points.size > 1:
+                bbox = torch.tensor([cls, *valid_points.min(axis=0), *valid_points.max(axis=0)])
+                bboxes.append(bbox)
+        if bboxes:
+            return torch.stack(bboxes)
+        else:
+            logger.warning("No valid BBox in {}", label_path)
+            return None
+    def get_data(self, idx):
+        img_path, bboxes = self.data[idx]
+        img = Image.open(img_path).convert("RGB")
+        return img, bboxes
+    def get_more_data(self, num: int = 1):
+        indices = torch.randint(0, len(self), (num,))
+        return [self.get_data(idx) for idx in indices]
+    def __getitem__(self, idx) -> Union[Image.Image, torch.Tensor]:
+        img, bboxes = self.get_data(idx)
+        if self.transform:
+            img, bboxes = self.transform(img, bboxes)
+        img = TF.to_tensor(img)
+        return img, bboxes
+    def __len__(self) -> int:
+        return len(self.data)
+class YoloDataLoader(DataLoader):
+    def __init__(self, config: dict):
+        """Initializes the YoloDataLoader with hydra-config files."""
+        hyper = config.hyper.data
+        dataset = YoloDataset(config)
+        super().__init__(
+            dataset,
+            batch_size=hyper.batch_size,
+            shuffle=hyper.shuffle,
+            num_workers=hyper.num_workers,
+            pin_memory=hyper.pin_memory,
+            collate_fn=self.collate_fn,
+        )
+    def collate_fn(self, batch: List[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        """
+        A collate function to handle batching of images and their corresponding targets.
+        Args:
+            batch (list of tuples): Each tuple contains:
+                - image (torch.Tensor): The image tensor.
+                - labels (torch.Tensor): The tensor of labels for the image.
+        Returns:
+            Tuple[torch.Tensor, List[torch.Tensor]]: A tuple containing:
+                - A tensor of batched images.
+                - A list of tensors, each corresponding to bboxes for each image in the batch.
+        """
+        images = torch.stack([item[0] for item in batch])
+        targets = [item[1] for item in batch]
+        return images, targets
+def get_dataloader(config):
+    return YoloDataLoader(config)
+@hydra.main(config_path="../config", config_name="config", version_base=None)
+def main(cfg):
+    dataloader = get_dataloader(cfg)
+    draw_bboxes(*next(iter(dataloader)))
+if __name__ == "__main__":
+    import sys
+    sys.path.append("./")
+    from tools.log_helper import custom_logger
+    custom_logger()
+    main()

yolo/utils/drawer.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import List, Union
+import torch
+from loguru import logger
+from PIL import Image, ImageDraw, ImageFont
+from torchvision.transforms.functional import to_pil_image
+def draw_bboxes(img: Union[Image.Image, torch.Tensor], bboxes: List[List[Union[int, float]]]):
+    """
+    Draw bounding boxes on an image.
+    Args:
+    - img (PIL Image or torch.Tensor): Image on which to draw the bounding boxes.
+    - bboxes (List of Lists/Tensors): Bounding boxes with [class_id, x_min, y_min, x_max, y_max],
+      where coordinates are normalized [0, 1].
+    """
+    # Convert tensor image to PIL Image if necessary
+    if isinstance(img, torch.Tensor):
+        if img.dim() > 3:
+            logger.info("Multi-frame tensor detected, using the first image.")
+            img = img[0]
+            bboxes = bboxes[0]
+        img = to_pil_image(img)
+    draw = ImageDraw.Draw(img)
+    width, height = img.size
+    font = ImageFont.load_default(30)
+    for bbox in bboxes:
+        class_id, x_min, y_min, x_max, y_max = bbox
+        x_min = x_min * width
+        x_max = x_max * width
+        y_min = y_min * height
+        y_max = y_max * height
+        shape = [(x_min, y_min), (x_max, y_max)]
+        draw.rectangle(shape, outline="red", width=3)
+        draw.text((x_min, y_min), str(int(class_id)), font=font, fill="blue")
+    img.save("visualize.jpg")  # Save the image with annotations
+    logger.info("Saved visualize image at visualize.png")

yolo/utils/get_dataset.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+import zipfile
+import requests
+from hydra import main
+from loguru import logger
+from tqdm import tqdm
+def download_file(url, destination):
+    """
+    Downloads a file from the specified URL to the destination path with progress logging.
+    """
+    logger.info(f"Downloading {os.path.basename(destination)}...")
+    with requests.get(url, stream=True) as response:
+        response.raise_for_status()
+        total_size = int(response.headers.get("content-length", 0))
+        progress = tqdm(total=total_size, unit="iB", unit_scale=True, desc=os.path.basename(destination), leave=True)
+        with open(destination, "wb") as file:
+            for data in response.iter_content(chunk_size=1024 * 1024):  # 1 MB chunks
+                file.write(data)
+                progress.update(len(data))
+        progress.close()
+    logger.info("Download completed.")
+def unzip_file(source, destination):
+    """
+    Extracts a ZIP file to the specified directory and removes the ZIP file after extraction.
+    """
+    logger.info(f"Unzipping {os.path.basename(source)}...")
+    with zipfile.ZipFile(source, "r") as zip_ref:
+        zip_ref.extractall(destination)
+    os.remove(source)
+    logger.info(f"Removed {source}.")
+def check_files(directory, expected_count=None):
+    """
+    Returns True if the number of files in the directory matches expected_count, False otherwise.
+    """
+    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
+    return len(files) == expected_count if expected_count is not None else bool(files)
+@main(config_path="../config/data", config_name="download", version_base=None)
+def prepare_dataset(cfg):
+    """
+    Prepares dataset by downloading and unzipping if necessary.
+    """
+    data_dir = cfg.save_path
+    for data_type, settings in cfg.datasets.items():
+        base_url = settings["base_url"]
+        for dataset_type, dataset_args in settings.items():
+            if dataset_type == "base_url":
+                continue  # Skip the base_url entry
+            file_name = f"{dataset_args.get('file_name', dataset_type)}.zip"
+            url = f"{base_url}{file_name}"
+            local_zip_path = os.path.join(data_dir, file_name)
+            extract_to = os.path.join(data_dir, data_type) if data_type != "annotations" else data_dir
+            final_place = os.path.join(extract_to, dataset_type)
+            os.makedirs(extract_to, exist_ok=True)
+            if check_files(final_place, dataset_args.get("file_num")):
+                logger.info(f"Dataset {dataset_type} already verified.")
+                continue
+            if not os.path.exists(local_zip_path):
+                download_file(url, local_zip_path)
+            unzip_file(local_zip_path, extract_to)
+            if not check_files(final_place, dataset_args.get("file_num")):
+                logger.error(f"Error verifying the {dataset_type} dataset after extraction.")
+if __name__ == "__main__":
+    import sys
+    sys.path.append("./")
+    from tools.log_helper import custom_logger
+    custom_logger()
+    prepare_dataset()

yolo/utils/loss.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ def get_loss_function(args, *kwargs):
2	+ raise NotImplementedError