Spaces:

dineshreddy
/

WALT

Runtime error

App Files Files Community

3v324v23 commited on Jun 29, 2022

Commit

a56642d

1 Parent(s): 86c421b

update demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

LICENSE +20 -0
README.md +6 -5
app.py +79 -0
configs/_base_/datasets/parking_instance.py +48 -0
configs/_base_/datasets/parking_instance_coco.py +49 -0
configs/_base_/datasets/people_real_coco.py +49 -0
configs/_base_/datasets/walt_people.py +49 -0
configs/_base_/datasets/walt_vehicle.py +49 -0
configs/_base_/default_runtime.py +16 -0
configs/_base_/models/mask_rcnn_swin_fpn.py +127 -0
configs/_base_/models/occ_mask_rcnn_swin_fpn.py +127 -0
configs/_base_/schedules/schedule_1x.py +11 -0
configs/walt/walt_people.py +80 -0
configs/walt/walt_vehicle.py +80 -0
cwalt/CWALT.py +161 -0
cwalt/Clip_WALT_Generate.py +284 -0
cwalt/Download_Detections.py +28 -0
cwalt/clustering_utils.py +132 -0
cwalt/kmedoid.py +55 -0
cwalt/utils.py +168 -0
cwalt_generate.py +14 -0
docker/Dockerfile +52 -0
github_vis/cwalt.gif +0 -0
github_vis/vis_cars.gif +0 -0
github_vis/vis_people.gif +0 -0
infer.py +118 -0
mmcv_custom/__init__.py +5 -0
mmcv_custom/checkpoint.py +500 -0
mmcv_custom/runner/__init__.py +8 -0
mmcv_custom/runner/checkpoint.py +85 -0
mmcv_custom/runner/epoch_based_runner.py +104 -0
mmdet/__init__.py +28 -0
mmdet/apis/__init__.py +10 -0
mmdet/apis/inference.py +217 -0
mmdet/apis/test.py +189 -0
mmdet/apis/train.py +185 -0
mmdet/core/__init__.py +7 -0
mmdet/core/anchor/__init__.py +11 -0
mmdet/core/anchor/anchor_generator.py +727 -0
mmdet/core/anchor/builder.py +7 -0
mmdet/core/anchor/point_generator.py +37 -0
mmdet/core/anchor/utils.py +71 -0
mmdet/core/bbox/__init__.py +27 -0
mmdet/core/bbox/assigners/__init__.py +16 -0
mmdet/core/bbox/assigners/approx_max_iou_assigner.py +145 -0
mmdet/core/bbox/assigners/assign_result.py +204 -0
mmdet/core/bbox/assigners/atss_assigner.py +178 -0
mmdet/core/bbox/assigners/base_assigner.py +9 -0
mmdet/core/bbox/assigners/center_region_assigner.py +335 -0
mmdet/core/bbox/assigners/grid_assigner.py +155 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,20 @@

+Copyright (c) 2022-2022 dinesh reddy and others
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: WALT
-emoji: 👁
-colorFrom: purple
-colorTo: yellow
 sdk: gradio
-sdk_version: 3.0.21
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: WALT DEMO
+emoji: ⚡
+colorFrom: indigo
+colorTo: indigo
 sdk: gradio
+sdk_version: 3.0.20
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import numpy as np
+import torch
+import gradio as gr
+from infer import detections
+'''
+import os
+os.system("mkdir data")
+os.system("mkdir data/models")
+os.system("wget https://www.cs.cmu.edu/~walt/models/walt_people.pth -O data/models/walt_people.pth")
+os.system("wget https://www.cs.cmu.edu/~walt/models/walt_vehicle.pth -O data/models/walt_vehicle.pth")
+'''
+def walt_demo(input_img, confidence_threshold):
+    #detect_people = detections('configs/walt/walt_people.py', 'cuda:0', model_path='data/models/walt_people.pth')
+    if torch.cuda.is_available() == False:
+        device='cpu'
+    else:
+        device='cuda:0'
+    #detect_people = detections('configs/walt/walt_people.py', device, model_path='data/models/walt_people.pth')
+    detect = detections('configs/walt/walt_vehicle.py', device, model_path='data/models/walt_vehicle.pth', threshold=confidence_threshold)
+    count = 0
+    #img = detect_people.run_on_image(input_img)
+    output_img = detect.run_on_image(input_img)
+    #try:
+    #except:
+    #    print("detecting on image failed")
+    return output_img
+description = """
+WALT Demo on WALT dataset. After watching and automatically learning for several days, this approach shows significant performance improvement in detecting and segmenting occluded people and vehicles, over human-supervised amodal approaches</b>.
+<center>
+    <a href="https://www.cs.cmu.edu/~walt/">
+        <img style="display:inline" alt="Project page" src="https://img.shields.io/badge/Project%20Page-WALT-green">
+    </a>
+    <a href="https://www.cs.cmu.edu/~walt/pdf/walt.pdf"><img style="display:inline" src="https://img.shields.io/badge/Paper-Pdf-red"></a>
+    <a href="https://github.com/dineshreddy91/WALT"><img style="display:inline" src="https://img.shields.io/github/stars/dineshreddy91/WALT?style=social"></a>
+</center>
+"""
+title = "WALT:Watch And Learn 2D Amodal Representation using Time-lapse Imagery"
+article="""
+<center>
+    <img src='https://visitor-badge.glitch.me/badge?page_id=anhquancao.MonoScene&left_color=darkmagenta&right_color=purple' alt='visitor badge'>
+</center>
+"""
+examples = [
+    ['demo/images/img_1.jpg',0.8],
+    ['demo/images/img_2.jpg',0.8],
+    ['demo/images/img_4.png',0.85],
+]
+'''
+import cv2
+filename='demo/images/img_1.jpg'
+img=cv2.imread(filename)
+img=walt_demo(img)
+cv2.imwrite(filename.replace('/images/','/results/'),img)
+cv2.imwrite('check.png',img)
+'''
+confidence_threshold = gr.Slider(minimum=0.3,
+                                    maximum=1.0,
+                                    step=0.01,
+                                    value=1.0,
+                                    label="Amodal Detection Confidence Threshold")
+inputs = [gr.Image(), confidence_threshold]
+demo = gr.Interface(walt_demo,
+        outputs="image",
+        inputs=inputs,
+        article=article,
+        title=title,
+        enable_queue=True,
+        examples=examples,
+        description=description)
+#demo.launch(server_name="0.0.0.0", server_port=7000)
+demo.launch()

configs/_base_/datasets/parking_instance.py ADDED Viewed

	@@ -0,0 +1,48 @@

+dataset_type = 'ParkingDataset'
+data_root = 'data/parking/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d','gt_bboxes_3d_proj']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=1,
+    workers_per_gpu=1,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'GT_data/',
+        img_prefix=data_root + 'images/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'GT_data/',
+        img_prefix=data_root + 'images/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'GT_data/',
+        img_prefix=data_root + 'images/',
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox'])#, 'segm'])

configs/_base_/datasets/parking_instance_coco.py ADDED Viewed

	@@ -0,0 +1,49 @@

+dataset_type = 'ParkingCocoDataset'
+data_root = 'data/parking/'
+data_root_test = 'data/parking_highres/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=6,
+    workers_per_gpu=6,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'GT_data/',
+        img_prefix=data_root + 'images/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root_test + 'GT_data/',
+        img_prefix=data_root_test + 'images',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root_test + 'GT_data/',
+        img_prefix=data_root_test + 'images',
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox', 'segm'])

configs/_base_/datasets/people_real_coco.py ADDED Viewed

	@@ -0,0 +1,49 @@

+dataset_type = 'WaltDataset'
+data_root = 'data/cwalt_train/'
+data_root_test = 'data/cwalt_test/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=8,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/',
+        img_prefix=data_root + '/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root_test + '/',
+        img_prefix=data_root_test + '/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root_test + '/',
+        img_prefix=data_root_test + '/',
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox', 'segm'])

configs/_base_/datasets/walt_people.py ADDED Viewed

	@@ -0,0 +1,49 @@

+dataset_type = 'WaltDataset'
+data_root = 'data/cwalt_train/'
+data_root_test = 'data/cwalt_test/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=8,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/',
+        img_prefix=data_root + '/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root_test + '/',
+        img_prefix=data_root_test + '/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root_test + '/',
+        img_prefix=data_root_test + '/',
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox', 'segm'])

configs/_base_/datasets/walt_vehicle.py ADDED Viewed

	@@ -0,0 +1,49 @@

+dataset_type = 'WaltDataset'
+data_root = 'data/cwalt_train/'
+data_root_test = 'data/cwalt_test/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=5,
+    workers_per_gpu=5,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/',
+        img_prefix=data_root + '/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root_test + '/',
+        img_prefix=data_root_test + '/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root_test + '/',
+        img_prefix=data_root_test + '/',
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox', 'segm'])

configs/_base_/default_runtime.py ADDED Viewed

	@@ -0,0 +1,16 @@

+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+custom_hooks = [dict(type='NumClassCheckHook')]
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]

configs/_base_/models/mask_rcnn_swin_fpn.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# model settings
+model = dict(
+    type='MaskRCNN',
+    pretrained=None,
+    backbone=dict(
+        type='SwinTransformer',
+        embed_dim=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4.,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.2,
+        ape=False,
+        patch_norm=True,
+        out_indices=(0, 1, 2, 3),
+        use_checkpoint=False),
+    neck=dict(
+        type='FPN',
+        in_channels=[96, 192, 384, 768],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

configs/_base_/models/occ_mask_rcnn_swin_fpn.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# model settings
+model = dict(
+    type='MaskRCNN',
+    pretrained=None,
+    backbone=dict(
+        type='SwinTransformer',
+        embed_dim=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4.,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.2,
+        ape=False,
+        patch_norm=True,
+        out_indices=(0, 1, 2, 3),
+        use_checkpoint=False),
+    neck=dict(
+        type='FPN',
+        in_channels=[96, 192, 384, 768],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNOccMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

configs/_base_/schedules/schedule_1x.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[8, 11])
+runner = dict(type='EpochBasedRunner', max_epochs=12)

configs/walt/walt_people.py ADDED Viewed

	@@ -0,0 +1,80 @@

+_base_ = [
+    '../_base_/models/occ_mask_rcnn_swin_fpn.py',
+    '../_base_/datasets/walt_people.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+model = dict(
+    backbone=dict(
+        embed_dim=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        ape=False,
+        drop_path_rate=0.1,
+        patch_norm=True,
+        use_checkpoint=False
+    ),
+    neck=dict(in_channels=[96, 192, 384, 768]))
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# augmentation strategy originates from DETR / Sparse RCNN
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='AutoAugment',
+         policies=[
+             [
+                 dict(type='Resize',
+                      img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                                 (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                                 (736, 1333), (768, 1333), (800, 1333)],
+                      multiscale_mode='value',
+                      keep_ratio=True)
+             ],
+             [
+                 dict(type='Resize',
+                      img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                      multiscale_mode='value',
+                      keep_ratio=True),
+                 dict(type='RandomCrop',
+                      crop_type='absolute_range',
+                      crop_size=(384, 600),
+                      allow_negative_crop=True),
+                 dict(type='Resize',
+                      img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                 (576, 1333), (608, 1333), (640, 1333),
+                                 (672, 1333), (704, 1333), (736, 1333),
+                                 (768, 1333), (800, 1333)],
+                      multiscale_mode='value',
+                      override=True,
+                      keep_ratio=True)
+             ]
+         ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+data = dict(train=dict(pipeline=train_pipeline))
+optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
+                                                 'relative_position_bias_table': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.)}))
+lr_config = dict(step=[8, 11])
+runner = dict(type='EpochBasedRunnerAmp', max_epochs=12)
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=True,
+)

configs/walt/walt_vehicle.py ADDED Viewed

	@@ -0,0 +1,80 @@

+_base_ = [
+    '../_base_/models/occ_mask_rcnn_swin_fpn.py',
+    '../_base_/datasets/walt_vehicle.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+model = dict(
+    backbone=dict(
+        embed_dim=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        ape=False,
+        drop_path_rate=0.1,
+        patch_norm=True,
+        use_checkpoint=False
+    ),
+    neck=dict(in_channels=[96, 192, 384, 768]))
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# augmentation strategy originates from DETR / Sparse RCNN
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='AutoAugment',
+         policies=[
+             [
+                 dict(type='Resize',
+                      img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                                 (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                                 (736, 1333), (768, 1333), (800, 1333)],
+                      multiscale_mode='value',
+                      keep_ratio=True)
+             ],
+             [
+                 dict(type='Resize',
+                      img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                      multiscale_mode='value',
+                      keep_ratio=True),
+                 dict(type='RandomCrop',
+                      crop_type='absolute_range',
+                      crop_size=(384, 600),
+                      allow_negative_crop=True),
+                 dict(type='Resize',
+                      img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                 (576, 1333), (608, 1333), (640, 1333),
+                                 (672, 1333), (704, 1333), (736, 1333),
+                                 (768, 1333), (800, 1333)],
+                      multiscale_mode='value',
+                      override=True,
+                      keep_ratio=True)
+             ]
+         ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+data = dict(train=dict(pipeline=train_pipeline))
+optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
+                                                 'relative_position_bias_table': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.)}))
+lr_config = dict(step=[8, 11])
+runner = dict(type='EpochBasedRunnerAmp', max_epochs=12)
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=True,
+)

cwalt/CWALT.py ADDED Viewed

	@@ -0,0 +1,161 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Oct 19 19:14:47 2021
+@author: dinesh
+"""
+import glob
+from .utils import bb_intersection_over_union_unoccluded
+import numpy as np
+from PIL import Image
+import datetime
+import cv2
+import os
+from tqdm import tqdm
+def get_image(time, folder):
+    for week_loop in range(5):
+        try:
+            image = np.array(Image.open(folder+'/week' +str(week_loop)+'/'+ str(time).replace(' ','T').replace(':','-').split('+')[0] + '.jpg'))
+            break
+        except:
+            continue
+    if image is None:
+        print('file not found')
+    return image
+def get_mask(segm, image):
+    poly = np.array(segm).reshape((int(len(segm)/2), 2))
+    mask = image.copy()*0
+    cv2.fillConvexPoly(mask, poly, (255, 255, 255))
+    return mask
+def get_unoccluded(indices, tracks_all):
+    unoccluded_indexes = []
+    unoccluded_index_all =[]
+    while 1:
+        unoccluded_clusters = []
+        len_unocc = len(unoccluded_indexes)
+        for ind in indices:
+            if ind in unoccluded_indexes:
+                continue
+            occ = False
+            for ind_compare in indices:
+                if ind_compare in unoccluded_indexes:
+                    continue
+                if bb_intersection_over_union_unoccluded(tracks_all[ind], tracks_all[ind_compare]) > 0.01 and ind_compare != ind:
+                    occ = True
+            if occ==False:
+                unoccluded_indexes.extend([ind])
+                unoccluded_clusters.extend([ind])
+        if len(unoccluded_indexes) == len_unocc and len_unocc != 0:
+            for ind in indices:
+                if ind not in unoccluded_indexes:
+                    unoccluded_indexes.extend([ind])
+                    unoccluded_clusters.extend([ind])
+        unoccluded_index_all.append(unoccluded_clusters)
+        if len(unoccluded_indexes) > len(indices)-5:
+            break
+    return unoccluded_index_all
+def primes(n): # simple sieve of multiples
+   odds = range(3, n+1, 2)
+   sieve = set(sum([list(range(q*q, n+1, q+q)) for q in odds], []))
+   return [2] + [p for p in odds if p not in sieve]
+def save_image(image_read, save_path, data, path):
+        tracks = data['tracks_all_unoccluded']
+        segmentations = data['segmentation_all_unoccluded']
+        timestamps = data['timestamps_final_unoccluded']
+        image = image_read.copy()
+        indices = np.random.randint(len(tracks),size=30)
+        prime_numbers = primes(1000)
+        unoccluded_index_all = get_unoccluded(indices, tracks)
+        mask_stacked = image*0
+        mask_stacked_all =[]
+        count = 0
+        time = datetime.datetime.now()
+        for l in indices:
+                try:
+                    image_crop = get_image(timestamps[l], path)
+                except:
+                    continue
+                try:
+                    bb_left, bb_top, bb_width, bb_height, confidence = tracks[l]
+                except:
+                    bb_left, bb_top, bb_width, bb_height, confidence, track_id = tracks[l]
+                mask = get_mask(segmentations[l], image)
+                image[mask > 0] = image_crop[mask > 0]
+                mask[mask > 0] = 1
+                for count, mask_inc in enumerate(mask_stacked_all):
+                    mask_stacked_all[count][cv2.bitwise_and(mask, mask_inc) > 0] = 2
+                mask_stacked_all.append(mask)
+                mask_stacked += mask
+                count = count+1
+        cv2.imwrite(save_path + '/images/'+str(time).replace(' ','T').replace(':','-').split('+')[0] + '.jpg', image[:, :, ::-1])
+        cv2.imwrite(save_path + '/Segmentation/'+str(time).replace(' ','T').replace(':','-').split('+')[0] + '.jpg', mask_stacked[:, :, ::-1]*30)
+        np.savez_compressed(save_path+'/Segmentation/'+str(time).replace(' ','T').replace(':','-').split('+')[0], mask=mask_stacked_all)
+def CWALT_Generation(camera_name):
+    save_path_train = 'data/cwalt_train'
+    save_path_test = 'data/cwalt_test'
+    json_file_path = 'data/{}/{}.json'.format(camera_name,camera_name) # iii1/iii1_7_test.json' # './data.json'
+    path = 'data/' + camera_name
+    data = np.load(json_file_path + '.npz', allow_pickle=True)
+    ## slip data
+    data_train=dict()
+    data_test=dict()
+    split_index = int(len(data['timestamps_final_unoccluded'])*0.8)
+    data_train['tracks_all_unoccluded'] = data['tracks_all_unoccluded'][0:split_index]
+    data_train['segmentation_all_unoccluded'] = data['segmentation_all_unoccluded'][0:split_index]
+    data_train['timestamps_final_unoccluded'] = data['timestamps_final_unoccluded'][0:split_index]
+    data_test['tracks_all_unoccluded'] = data['tracks_all_unoccluded'][split_index:]
+    data_test['segmentation_all_unoccluded'] = data['segmentation_all_unoccluded'][split_index:]
+    data_test['timestamps_final_unoccluded'] = data['timestamps_final_unoccluded'][split_index:]
+    image_read = np.array(Image.open(path + '/T18-median_image.jpg'))
+    image_read = cv2.resize(image_read, (int(image_read.shape[1]/2), int(image_read.shape[0]/2)))
+    try:
+        os.mkdir(save_path_train)
+    except:
+        print(save_path_train)
+    try:
+        os.mkdir(save_path_train + '/images')
+        os.mkdir(save_path_train + '/Segmentation')
+    except:
+        print(save_path_train+ '/images')
+    try:
+        os.mkdir(save_path_test)
+    except:
+        print(save_path_test)
+    try:
+        os.mkdir(save_path_test + '/images')
+        os.mkdir(save_path_test + '/Segmentation')
+    except:
+        print(save_path_test+ '/images')
+    for loop in tqdm(range(3000), desc="Generating training CWALT Images "):
+        save_image(image_read, save_path_train, data_train, path)
+    for loop in tqdm(range(300), desc="Generating testing CWALT Images "):
+        save_image(image_read, save_path_test, data_test, path)

cwalt/Clip_WALT_Generate.py ADDED Viewed

	@@ -0,0 +1,284 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 20 15:15:11 2022
+@author: dinesh
+"""
+from collections import OrderedDict
+from matplotlib import pyplot as plt
+from .utils import *
+import scipy.interpolate
+from scipy import interpolate
+from .clustering_utils import *
+import glob
+import cv2
+from PIL import Image
+import json
+import cv2
+import numpy as np
+from tqdm import tqdm
+def ignore_indexes(tracks_all, labels_all):
+    # get repeating bounding boxes
+    get_indexes = lambda x, xs: [i for (y, i) in zip(xs, range(len(xs))) if x == y]
+    ignore_ind = []
+    for index, track in enumerate(tracks_all):
+        print('in ignore', index, len(tracks_all))
+        if index in ignore_ind:
+            continue
+        if labels_all[index] < 1 or labels_all[index] > 3:
+            ignore_ind.extend([index])
+        ind = get_indexes(track, tracks_all)
+        if len(ind) > 30:
+            ignore_ind.extend(ind)
+    return ignore_ind
+def repeated_indexes_old(tracks_all,ignore_ind, unoccluded_indexes=None):
+    # get repeating bounding boxes
+    get_indexes = lambda x, xs: [i for (y, i) in zip(xs, range(len(xs))) if bb_intersection_over_union(x, y) > 0.8 and i not in ignore_ind]
+    repeat_ind = []
+    repeat_inds =[]
+    if unoccluded_indexes == None:
+        for index, track in enumerate(tracks_all):
+            if index in repeat_ind or index in ignore_ind:
+                continue
+            ind = get_indexes(track, tracks_all)
+            if len(ind) > 20:
+                repeat_ind.extend(ind)
+                repeat_inds.append([ind,track])
+    else:
+        for index in unoccluded_indexes:
+            if index in repeat_ind or index in ignore_ind:
+                continue
+            ind = get_indexes(tracks_all[index], tracks_all)
+            if len(ind) > 3:
+                repeat_ind.extend(ind)
+                repeat_inds.append([ind,tracks_all[index]])
+    return repeat_inds
+def get_unoccluded_instances(timestamps_final, tracks_all, ignore_ind=[], threshold = 0.01):
+    get_indexes = lambda x, xs: [i for (y, i) in zip(xs, range(len(xs))) if x==y]
+    unoccluded_indexes = []
+    time_checked = []
+    stationary_obj = []
+    count =0
+    for time in tqdm(np.unique(timestamps_final), desc="Detecting Unocclued objects in Image "):
+        count += 1
+        if [time.year,time.month, time.day, time.hour, time.minute, time.second, time.microsecond] in time_checked:
+            analyze_bb = []
+            for ind in unoccluded_indexes_time:
+                for ind_compare in  same_time_instances:
+                    iou = bb_intersection_over_union(tracks_all[ind], tracks_all[ind_compare])
+                    if  iou < 0.5 and iou > 0:
+                        analyze_bb.extend([ind_compare])
+                    if iou > 0.99:
+                        stationary_obj.extend([str(ind_compare)+'+'+str(ind)])
+            for ind in  analyze_bb:
+                occ = False
+                for ind_compare in same_time_instances:
+                    if bb_intersection_over_union_unoccluded(tracks_all[ind], tracks_all[ind_compare], threshold=threshold) > threshold and ind_compare != ind:
+                        occ = True
+                        break
+                if occ == False:
+                    unoccluded_indexes.extend([ind])
+            continue
+        same_time_instances = get_indexes(time,timestamps_final)
+        unoccluded_indexes_time = []
+        for ind in same_time_instances:
+            if tracks_all[ind][4] < 0.9 or ind in ignore_ind:# or ind != 1859:
+                continue
+            occ = False
+            for ind_compare in same_time_instances:
+                if bb_intersection_over_union_unoccluded(tracks_all[ind], tracks_all[ind_compare], threshold=threshold) > threshold and ind_compare != ind and tracks_all[ind_compare][4] < 0.5:
+                    occ = True
+                    break
+            if occ==False:
+                unoccluded_indexes.extend([ind])
+                unoccluded_indexes_time.extend([ind])
+        time_checked.append([time.year,time.month, time.day, time.hour, time.minute, time.second, time.microsecond])
+    return unoccluded_indexes,stationary_obj
+def visualize_unoccluded_detection(timestamps_final,tracks_all,segmentation_all,  unoccluded_indexes, cwalt_data_path, camera_name, ignore_ind=[]):
+    tracks_final = []
+    tracks_final.append([])
+    try:
+        os.mkdir(cwalt_data_path + '/' + camera_name+'_unoccluded_car_detection/')
+    except:
+        print('Unoccluded debugging exists')
+    for time in tqdm(np.unique(timestamps_final), desc="Visualizing Unocclued objects in Image "):
+        get_indexes = lambda x, xs: [i for (y, i) in zip(xs, range(len(xs))) if x==y]
+        ind = get_indexes(time, timestamps_final)
+        image_unocc = False
+        for index in ind:
+            if index not in unoccluded_indexes:
+                continue
+            else:
+                image_unocc = True
+                break
+        if image_unocc == False:
+            continue
+        for week_loop in range(5):
+            try:
+                image = np.array(Image.open(cwalt_data_path+'/week' +str(week_loop)+'/'+ str(time).replace(' ','T').replace(':','-').split('+')[0] + '.jpg'))
+                break
+            except:
+                continue
+        try:
+            mask = image*0
+        except:
+            print('image not found for ' + str(time).replace(' ','T').replace(':','-').split('+')[0] + '.jpg' )
+            continue
+        image_original = image.copy()
+        for index in ind:
+            track = tracks_all[index]
+            if index in ignore_ind:
+                continue
+            if index not in unoccluded_indexes:
+                continue
+            try:
+                bb_left, bb_top, bb_width, bb_height, confidence, id = track
+            except:
+                bb_left, bb_top, bb_width, bb_height, confidence = track
+            if confidence > 0.6:
+                mask = poly_seg(image, segmentation_all[index])
+        cv2.imwrite(cwalt_data_path +  '/' + camera_name+'_unoccluded_car_detection/' + str(index)+'.png', mask[:, :, ::-1])
+def repeated_indexes(tracks_all,ignore_ind, repeat_count = 10, unoccluded_indexes=None):
+    get_indexes = lambda x, xs: [i for (y, i) in zip(xs, range(len(xs))) if bb_intersection_over_union(x, y) > 0.8 and i not in ignore_ind]
+    repeat_ind = []
+    repeat_inds =[]
+    if unoccluded_indexes == None:
+        for index, track in enumerate(tracks_all):
+            if index in repeat_ind or index in ignore_ind:
+                continue
+            ind = get_indexes(track, tracks_all)
+            if len(ind) > repeat_count:
+                repeat_ind.extend(ind)
+                repeat_inds.append([ind,track])
+    else:
+        for index in unoccluded_indexes:
+            if index in repeat_ind or index in ignore_ind:
+                continue
+            ind = get_indexes(tracks_all[index], tracks_all)
+            if len(ind) > repeat_count:
+                repeat_ind.extend(ind)
+                repeat_inds.append([ind,tracks_all[index]])
+    return repeat_inds
+def poly_seg(image, segm):
+    poly = np.array(segm).reshape((int(len(segm)/2), 2))
+    overlay = image.copy()
+    alpha = 0.5
+    cv2.fillPoly(overlay, [poly], color=(255, 255, 0))
+    cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
+    return image
+def visualize_unoccuded_clusters(repeat_inds, tracks, segmentation_all, timestamps_final, cwalt_data_path):
+    for index_, repeat_ind in enumerate(repeat_inds):
+        image = np.array(Image.open(cwalt_data_path+'/'+'T18-median_image.jpg'))
+        try:
+            os.mkdir(cwalt_data_path+ '/Cwalt_database/')
+        except:
+            print('folder exists')
+        try:
+            os.mkdir(cwalt_data_path+ '/Cwalt_database/' + str(index_) +'/')
+        except:
+            print(cwalt_data_path+ '/Cwalt_database/' + str(index_) +'/')
+        for i in repeat_ind[0]:
+            try:
+                bb_left, bb_top, bb_width, bb_height, confidence = tracks[i]#bbox
+            except:
+                bb_left, bb_top, bb_width, bb_height, confidence, track_id = tracks[i]#bbox
+            cv2.rectangle(image,(int(bb_left), int(bb_top)),(int(bb_left+bb_width), int(bb_top+bb_height)),(0, 0, 255), 2)
+            time = timestamps_final[i]
+            for week_loop in range(5):
+                try:
+                    image1 = np.array(Image.open(cwalt_data_path+'/week' +str(week_loop)+'/'+ str(time).replace(' ','T').replace(':','-').split('+')[0] + '.jpg'))
+                    break
+                except:
+                    continue
+            crop = image1[int(bb_top): int(bb_top + bb_height), int(bb_left):int(bb_left + bb_width)]
+            cv2.imwrite(cwalt_data_path+ '/Cwalt_database/' + str(index_) +'/o_' + str(i) +'.jpg', crop[:, :, ::-1])
+            image1 = poly_seg(image1,segmentation_all[i])
+            crop = image1[int(bb_top): int(bb_top + bb_height), int(bb_left):int(bb_left + bb_width)]
+            cv2.imwrite(cwalt_data_path+ '/Cwalt_database/' + str(index_) +'/' + str(i)+'.jpg', crop[:, :, ::-1])
+        if index_ > 100:
+            break
+        cv2.imwrite(cwalt_data_path+ '/Cwalt_database/' +  str(index_) +'.jpg', image[:, :, ::-1])
+def Get_unoccluded_objects(camera_name, debug = False, scale=True):
+    cwalt_data_path = 'data/' + camera_name
+    data_folder = cwalt_data_path
+    json_file_path = cwalt_data_path + '/' + camera_name + '.json'
+    with open(json_file_path, 'r') as j:
+        annotations = json.loads(j.read())
+    tracks_all = [parse_bbox(anno['bbox']) for anno in annotations]
+    segmentation_all = [parse_bbox(anno['segmentation']) for anno in annotations]
+    labels_all = [anno['label_id'] for anno in annotations]
+    timestamps_final = [parse(anno['time']) for anno in annotations]
+    if scale ==True:
+        scale_factor = 2
+        tracks_all_numpy = np.array(tracks_all)
+        tracks_all_numpy[:,:4] = np.array(tracks_all)[:,:4]/scale_factor
+        tracks_all = tracks_all_numpy.tolist()
+        segmentation_all_scaled = []
+        for list_loop in segmentation_all:
+            segmentation_all_scaled.append((np.floor_divide(np.array(list_loop),scale_factor)).tolist())
+        segmentation_all = segmentation_all_scaled
+    if debug == True:
+        timestamps_final = timestamps_final[:1000]
+        labels_all = labels_all[:1000]
+        segmentation_all = segmentation_all[:1000]
+        tracks_all = tracks_all[:1000]
+    unoccluded_indexes, stationary = get_unoccluded_instances(timestamps_final, tracks_all, threshold = 0.05)
+    if debug == True:
+        visualize_unoccluded_detection(timestamps_final, tracks_all, segmentation_all, unoccluded_indexes, cwalt_data_path, camera_name)
+    tracks_all_unoccluded = [tracks_all[i] for i in unoccluded_indexes]
+    segmentation_all_unoccluded = [segmentation_all[i] for i in unoccluded_indexes]
+    labels_all_unoccluded = [labels_all[i] for i in unoccluded_indexes]
+    timestamps_final_unoccluded = [timestamps_final[i] for i in unoccluded_indexes]
+    np.savez(json_file_path,tracks_all_unoccluded=tracks_all_unoccluded, segmentation_all_unoccluded=segmentation_all_unoccluded, labels_all_unoccluded=labels_all_unoccluded, timestamps_final_unoccluded=timestamps_final_unoccluded )
+    if debug == True:
+        repeat_inds_clusters = repeated_indexes(tracks_all_unoccluded,[], repeat_count=1)
+        visualize_unoccuded_clusters(repeat_inds_clusters, tracks_all_unoccluded, segmentation_all_unoccluded, timestamps_final_unoccluded, cwalt_data_path)
+    else:
+        repeat_inds_clusters = repeated_indexes(tracks_all_unoccluded,[], repeat_count=10)
+    np.savez(json_file_path + '_clubbed', repeat_inds=repeat_inds_clusters)
+    np.savez(json_file_path + '_stationary', stationary=stationary)

cwalt/Download_Detections.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import json
+from psycopg2.extras import RealDictCursor
+#import cv2
+import psycopg2
+import cv2
+CONNECTION = "postgres://postgres:"
+conn = psycopg2.connect(CONNECTION)
+cursor = conn.cursor(cursor_factory=RealDictCursor)
+def get_sample():
+    camera_name, camera_id = 'cam2', 4
+    print('Executing SQL command')
+    cursor.execute("SELECT * FROM annotations WHERE camera_id = {} and time >='2021-05-01 00:00:00' and time <='2021-05-07 23:59:50' and label_id in (1,2)".format(camera_id))
+    print('Dumping to json')
+    annotations = json.dumps(cursor.fetchall(), indent=2, default=str)
+    wjdata = json.loads(annotations)
+    with open('{}_{}_test.json'.format(camera_name, camera_id), 'w') as f:
+        json.dump(wjdata, f)
+    print('Done dumping to json')
+get_sample()

cwalt/clustering_utils.py ADDED Viewed

	@@ -0,0 +1,132 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 20 15:18:20 2022
+@author: dinesh
+"""
+# 0 - Import related libraries
+import urllib
+import zipfile
+import os
+import scipy.io
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from scipy.spatial.distance import directed_hausdorff
+from sklearn.cluster import DBSCAN
+from sklearn.metrics.pairwise import pairwise_distances
+import scipy.spatial.distance
+from .kmedoid import kMedoids # kMedoids code is adapted from https://github.com/letiantian/kmedoids
+# Some visualization stuff, not so important
+# sns.set()
+plt.rcParams['figure.figsize'] = (12, 12)
+# Utility Functions
+color_lst = plt.rcParams['axes.prop_cycle'].by_key()['color']
+color_lst.extend(['firebrick', 'olive', 'indigo', 'khaki', 'teal', 'saddlebrown',
+                  'skyblue', 'coral', 'darkorange', 'lime', 'darkorchid', 'dimgray'])
+def plot_cluster(image, traj_lst, cluster_lst):
+    '''
+    Plots given trajectories with a color that is specific for every trajectory's own cluster index.
+    Outlier trajectories which are specified with -1 in `cluster_lst` are plotted dashed with black color
+    '''
+    cluster_count = np.max(cluster_lst) + 1
+    for traj, cluster in zip(traj_lst, cluster_lst):
+        # if cluster == -1:
+        #     # Means it it a noisy trajectory, paint it black
+        #     plt.plot(traj[:, 0], traj[:, 1], c='k', linestyle='dashed')
+        #
+        # else:
+        plt.plot(traj[:, 0], traj[:, 1], c=color_lst[cluster % len(color_lst)])
+    plt.imshow(image)
+    # plt.show()
+    plt.axis('off')
+    plt.savefig('trajectory.png', bbox_inches='tight')
+    plt.show()
+# 3 - Distance matrix
+def hausdorff( u, v):
+    d = max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0])
+    return d
+def build_distance_matrix(traj_lst):
+    # 2 - Trajectory segmentation
+    print('Running trajectory segmentation...')
+    degree_threshold = 5
+    for traj_index, traj in enumerate(traj_lst):
+        hold_index_lst = []
+        previous_azimuth = 1000
+        for point_index, point in enumerate(traj[:-1]):
+            next_point = traj[point_index + 1]
+            diff_vector = next_point - point
+            azimuth = (math.degrees(math.atan2(*diff_vector)) + 360) % 360
+            if abs(azimuth - previous_azimuth) > degree_threshold:
+                hold_index_lst.append(point_index)
+                previous_azimuth = azimuth
+        hold_index_lst.append(traj.shape[0] - 1)  # Last point of trajectory is always added
+        traj_lst[traj_index] = traj[hold_index_lst, :]
+    print('Building distance matrix...')
+    traj_count = len(traj_lst)
+    D = np.zeros((traj_count, traj_count))
+    # This may take a while
+    for i in range(traj_count):
+        if i % 20 == 0:
+            print(i)
+        for j in range(i + 1, traj_count):
+            distance = hausdorff(traj_lst[i], traj_lst[j])
+            D[i, j] = distance
+            D[j, i] = distance
+    return D
+def run_kmedoids(image, traj_lst, D):
+    # 4 - Different clustering methods
+    # 4.1 - kmedoids
+    traj_count = len(traj_lst)
+    k = 3  # The number of clusters
+    medoid_center_lst, cluster2index_lst = kMedoids(D, k)
+    cluster_lst = np.empty((traj_count,), dtype=int)
+    for cluster in cluster2index_lst:
+        cluster_lst[cluster2index_lst[cluster]] = cluster
+    plot_cluster(image, traj_lst, cluster_lst)
+def run_dbscan(image, traj_lst, D):
+    mdl = DBSCAN(eps=400, min_samples=10)
+    cluster_lst = mdl.fit_predict(D)
+    plot_cluster(image, traj_lst, cluster_lst)

cwalt/kmedoid.py ADDED Viewed

	@@ -0,0 +1,55 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 20 15:18:56 2022
+@author: dinesh
+"""
+import numpy as np
+import math
+def kMedoids(D, k, tmax=100):
+    # determine dimensions of distance matrix D
+    m, n = D.shape
+    np.fill_diagonal(D, math.inf)
+    if k > n:
+        raise Exception('too many medoids')
+    # randomly initialize an array of k medoid indices
+    M = np.arange(n)
+    np.random.shuffle(M)
+    M = np.sort(M[:k])
+    # create a copy of the array of medoid indices
+    Mnew = np.copy(M)
+    # initialize a dictionary to represent clusters
+    C = {}
+    for t in range(tmax):
+        # determine clusters, i. e. arrays of data indices
+        J = np.argmin(D[:,M], axis=1)
+        for kappa in range(k):
+            C[kappa] = np.where(J==kappa)[0]
+        # update cluster medoids
+        for kappa in range(k):
+            J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1)
+            j = np.argmin(J)
+            Mnew[kappa] = C[kappa][j]
+        np.sort(Mnew)
+        # check for convergence
+        if np.array_equal(M, Mnew):
+            break
+        M = np.copy(Mnew)
+    else:
+        # final update of cluster memberships
+        J = np.argmin(D[:,M], axis=1)
+        for kappa in range(k):
+            C[kappa] = np.where(J==kappa)[0]
+    np.fill_diagonal(D, 0)
+    # return results
+    return M, C

cwalt/utils.py ADDED Viewed

	@@ -0,0 +1,168 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 20 15:16:56 2022
+@author: dinesh
+"""
+import json
+import cv2
+from PIL import Image
+import numpy as np
+from dateutil.parser import parse
+def bb_intersection_over_union(box1, box2):
+    #print(box1, box2)
+    boxA = box1.copy()
+    boxB = box2.copy()
+    boxA[2] = boxA[0]+boxA[2]
+    boxA[3] = boxA[1]+boxA[3]
+    boxB[2] = boxB[0]+boxB[2]
+    boxB[3] = boxB[1]+boxB[3]
+    # determine the (x, y)-coordinates of the intersection rectangle
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+    # compute the area of intersection rectangle
+    interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
+    if interArea == 0:
+        return 0
+    # compute the area of both the prediction and ground-truth
+    # rectangles
+    boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
+    boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
+    # compute the intersection over union by taking the intersection
+    # area and dividing it by the sum of prediction + ground-truth
+    # areas - the interesection area
+    iou = interArea / float(boxAArea + boxBArea - interArea)
+    return iou
+def bb_intersection_over_union_unoccluded(box1, box2, threshold=0.01):
+    #print(box1, box2)
+    boxA = box1.copy()
+    boxB = box2.copy()
+    boxA[2] = boxA[0]+boxA[2]
+    boxA[3] = boxA[1]+boxA[3]
+    boxB[2] = boxB[0]+boxB[2]
+    boxB[3] = boxB[1]+boxB[3]
+    # determine the (x, y)-coordinates of the intersection rectangle
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+    # compute the area of intersection rectangle
+    interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
+    if interArea == 0:
+        return 0
+    # compute the area of both the prediction and ground-truth
+    # rectangles
+    boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
+    boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
+    # compute the intersection over union by taking the intersection
+    # area and dividing it by the sum of prediction + ground-truth
+    # areas - the interesection area
+    iou = interArea / float(boxAArea + boxBArea - interArea)
+    #print(iou)
+    # return the intersection over union value
+    occlusion = False
+    if iou > threshold and iou < 1:
+        #print(boxA[3], boxB[3], boxB[1])
+        if boxA[3] < boxB[3]:# and boxA[3] > boxB[1]:
+            if boxB[2] > boxA[0]:# and boxB[2] < boxA[2]:
+                #print('first', (boxB[2] - boxA[0])/(boxA[2] - boxA[0]))
+                if (min(boxB[2],boxA[2]) - boxA[0])/(boxA[2] - boxA[0]) > threshold:
+                    occlusion = True
+            if boxB[0] < boxA[2]: # boxB[0] > boxA[0] and
+                #print('second', (boxA[2] - boxB[0])/(boxA[2] - boxA[0]))
+                if (boxA[2] - max(boxB[0],boxA[0]))/(boxA[2] - boxA[0]) > threshold:
+                    occlusion = True
+        if occlusion == False:
+            iou = iou*0
+            #asas
+    #    asas
+    #iou = 0.9 #iou*0
+    #print(box1, box2, iou, occlusion)
+    return iou
+def draw_tracks(image, tracks):
+    """
+    Draw on input image.
+    Args:
+        image (numpy.ndarray): image
+        tracks (list): list of tracks to be drawn on the image.
+    Returns:
+        numpy.ndarray: image with the track-ids drawn on it.
+    """
+    for trk in tracks:
+        trk_id = trk[1]
+        xmin = trk[2]
+        ymin = trk[3]
+        width = trk[4]
+        height = trk[5]
+        xcentroid, ycentroid = int(xmin + 0.5*width), int(ymin + 0.5*height)
+        text = "ID {}".format(trk_id)
+        cv2.putText(image, text, (xcentroid - 10, ycentroid - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        cv2.circle(image, (xcentroid, ycentroid), 4, (0, 255, 0), -1)
+    return image
+def draw_bboxes(image, tracks):
+    """
+    Draw the bounding boxes about detected objects in the image.
+    Args:
+        image (numpy.ndarray): Image or video frame.
+        bboxes (numpy.ndarray): Bounding boxes pixel coordinates as (xmin, ymin, width, height)
+        confidences (numpy.ndarray): Detection confidence or detection probability.
+        class_ids (numpy.ndarray): Array containing class ids (aka label ids) of each detected object.
+    Returns:
+        numpy.ndarray: image with the bounding boxes drawn on it.
+    """
+    for trk in tracks:
+        xmin = int(trk[2])
+        ymin = int(trk[3])
+        width = int(trk[4])
+        height = int(trk[5])
+        clr = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))
+        cv2.rectangle(image, (xmin, ymin), (xmin + width, ymin + height), clr, 2)
+    return image
+def num(v):
+    number_as_float = float(v)
+    number_as_int = int(number_as_float)
+    return number_as_int if number_as_float == number_as_int else number_as_float
+def parse_bbox(bbox_str):
+    bbox_list = bbox_str.strip('{').strip('}').split(',')
+    bbox_list = [num(elem) for elem in bbox_list]
+    return bbox_list
+def parse_seg(bbox_str):
+    bbox_list = bbox_str.strip('{').strip('}').split(',')
+    bbox_list = [num(elem) for elem in bbox_list]
+    ret = bbox_list  # []
+    # for i in range(0, len(bbox_list) - 1, 2):
+    #     ret.append((bbox_list[i], bbox_list[i + 1]))
+    return ret

cwalt_generate.py ADDED Viewed

	@@ -0,0 +1,14 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Jun  4 16:55:58 2022
+@author: dinesh
+"""
+from cwalt.CWALT import CWALT_Generation
+from cwalt.Clip_WALT_Generate import Get_unoccluded_objects
+if __name__ == '__main__':
+    camera_name = 'cam2'
+    Get_unoccluded_objects(camera_name)
+    CWALT_Generation(camera_name)

docker/Dockerfile ADDED Viewed

	@@ -0,0 +1,52 @@

+ARG PYTORCH="1.9.0"
+ARG CUDA="11.1"
+ARG CUDNN="8"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Install MMCV
+#RUN pip install mmcv-full==1.3.8 -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
+# -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
+RUN pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
+# Install MMDetection
+RUN conda clean --all
+RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection
+WORKDIR /mmdetection
+ENV FORCE_CUDA="1"
+RUN cd /mmdetection && git checkout 7bd39044f35aec4b90dd797b965777541a8678ff
+RUN pip install -r requirements/build.txt
+RUN pip install --no-cache-dir -e .
+RUN apt-get update
+RUN apt-get install -y vim
+RUN pip uninstall -y pycocotools
+RUN pip install mmpycocotools timm scikit-image imagesize
+# make sure we don't overwrite some existing directory called "apex"
+WORKDIR /tmp/unique_for_apex
+# uninstall Apex if present, twice to make absolutely sure :)
+RUN pip uninstall -y apex || :
+RUN pip uninstall -y apex || :
+# SHA is something the user can touch to force recreation of this Docker layer,
+# and therefore force cloning of the latest version of Apex
+RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git
+WORKDIR /tmp/unique_for_apex/apex
+RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
+RUN pip install seaborn sklearn imantics gradio
+WORKDIR /code
+ENTRYPOINT ["python", "app.py"]
+#RUN git clone https://github.com/NVIDIA/apex
+#RUN cd apex
+#RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
+#RUN pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./

github_vis/cwalt.gif ADDED Viewed

github_vis/vis_cars.gif ADDED Viewed

github_vis/vis_people.gif ADDED Viewed

infer.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from argparse import ArgumentParser
+from mmdet.apis import inference_detector, init_detector, show_result_pyplot
+from mmdet.core.mask.utils import encode_mask_results
+import numpy as np
+import mmcv
+import torch
+from imantics import Polygons, Mask
+import json
+import os
+import cv2, glob
+class detections():
+    def __init__(self, cfg_path, device, model_path = 'data/models/walt_vehicle.pth', threshold=0.85):
+        self.model = init_detector(cfg_path, model_path, device=device)
+        self.all_preds = []
+        self.all_scores = []
+        self.index = []
+        self.score_thr = threshold
+        self.result = []
+        self.record_dict = {'model': cfg_path,'results': []}
+        self.detect_count = []
+    def run_on_image(self, image):
+        self.result = inference_detector(self.model, image)
+        image_labelled = self.model.show_result(image, self.result, score_thr=self.score_thr)
+        return image_labelled
+    def process_output(self, count):
+        result = self.result
+        infer_result = {'url': count,
+                        'boxes': [],
+                        'scores': [],
+                        'keypoints': [],
+                        'segmentation': [],
+                        'label_ids': [],
+                        'track': [],
+                        'labels': []}
+        if isinstance(result, tuple):
+            bbox_result, segm_result = result
+            #segm_result = encode_mask_results(segm_result)
+            if isinstance(segm_result, tuple):
+                segm_result = segm_result[0]  # ms rcnn
+        bboxes = np.vstack(bbox_result)
+        labels = [np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result)]
+        labels = np.concatenate(labels)
+        segms = None
+        if segm_result is not None and len(labels) > 0:  # non empty
+            segms = mmcv.concat_list(segm_result)
+            if isinstance(segms[0], torch.Tensor):
+                segms = torch.stack(segms, dim=0).detach().cpu().numpy()
+            else:
+                segms = np.stack(segms, axis=0)
+        for i, (bbox, label, segm) in enumerate(zip(bboxes, labels, segms)):
+            if bbox[-1].item() <0.3:
+                continue
+            box = [bbox[0].item(), bbox[1].item(), bbox[2].item(), bbox[3].item()]
+            polygons = Mask(segm).polygons()
+            infer_result['boxes'].append(box)
+            infer_result['segmentation'].append(polygons.segmentation)
+            infer_result['scores'].append(bbox[-1].item())
+            infer_result['labels'].append(self.model.CLASSES[label])
+            infer_result['label_ids'].append(label)
+        self.record_dict['results'].append(infer_result)
+        self.detect_count = labels
+    def write_json(self, filename):
+        with open(filename + '.json', 'w') as f:
+            json.dump(self.record_dict, f)
+def main():
+    if torch.cuda.is_available() == False:
+        device='cpu'
+    else:
+        device='cuda:0'
+    detect_people = detections('configs/walt/walt_people.py', device, model_path='data/models/walt_people.pth')
+    detect = detections('configs/walt/walt_vehicle.py', device, model_path='data/models/walt_vehicle.pth')
+    filenames = sorted(glob.glob('demo/images/*'))
+    count = 0
+    for filename in filenames:
+        img=cv2.imread(filename)
+        try:
+            img = detect_people.run_on_image(img)
+            img = detect.run_on_image(img)
+        except:
+            continue
+        count=count+1
+        try:
+            import os
+            os.makedirs(os.path.dirname(filename.replace('demo','demo/results/')))
+            os.mkdirs(os.path.dirname(filename))
+        except:
+            print('done')
+        cv2.imwrite(filename.replace('demo','demo/results/'),img)
+        if count == 30000:
+            break
+        try:
+            detect.process_output(count)
+        except:
+            continue
+    '''
+    np.savez('FC', a= detect.record_dict)
+    with open('check.json', 'w') as f:
+        json.dump(detect.record_dict, f)
+    detect.write_json('seq3')
+    asas
+    detect.process_output(0)
+    '''
+if __name__ == "__main__":
+    main()

mmcv_custom/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# -*- coding: utf-8 -*-
+from .checkpoint import load_checkpoint
+__all__ = ['load_checkpoint']

mmcv_custom/checkpoint.py ADDED Viewed

	@@ -0,0 +1,500 @@

+# Copyright (c) Open-MMLab. All rights reserved.
+import io
+import os
+import os.path as osp
+import pkgutil
+import time
+import warnings
+from collections import OrderedDict
+from importlib import import_module
+from tempfile import TemporaryDirectory
+import torch
+import torchvision
+from torch.optim import Optimizer
+from torch.utils import model_zoo
+from torch.nn import functional as F
+import mmcv
+from mmcv.fileio import FileClient
+from mmcv.fileio import load as load_file
+from mmcv.parallel import is_module_wrapper
+from mmcv.utils import mkdir_or_exist
+from mmcv.runner import get_dist_info
+ENV_MMCV_HOME = 'MMCV_HOME'
+ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+DEFAULT_CACHE_DIR = '~/.cache'
+def _get_mmcv_home():
+    mmcv_home = os.path.expanduser(
+        os.getenv(
+            ENV_MMCV_HOME,
+            os.path.join(
+                os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')))
+    mkdir_or_exist(mmcv_home)
+    return mmcv_home
+def load_state_dict(module, state_dict, strict=False, logger=None):
+    """Load state_dict to a module.
+    This method is modified from :meth:`torch.nn.Module.load_state_dict`.
+    Default value for ``strict`` is set to ``False`` and the message for
+    param mismatch will be shown even if strict is False.
+    Args:
+        module (Module): Module that receives the state_dict.
+        state_dict (OrderedDict): Weights.
+        strict (bool): whether to strictly enforce that the keys
+            in :attr:`state_dict` match the keys returned by this module's
+            :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
+        logger (:obj:`logging.Logger`, optional): Logger to log the error
+            message. If not specified, print function will be used.
+    """
+    unexpected_keys = []
+    all_missing_keys = []
+    err_msg = []
+    metadata = getattr(state_dict, '_metadata', None)
+    state_dict = state_dict.copy()
+    if metadata is not None:
+        state_dict._metadata = metadata
+    # use _load_from_state_dict to enable checkpoint version control
+    def load(module, prefix=''):
+        # recursively check parallel module in case that the model has a
+        # complicated structure, e.g., nn.Module(nn.Module(DDP))
+        if is_module_wrapper(module):
+            module = module.module
+        local_metadata = {} if metadata is None else metadata.get(
+            prefix[:-1], {})
+        module._load_from_state_dict(state_dict, prefix, local_metadata, True,
+                                     all_missing_keys, unexpected_keys,
+                                     err_msg)
+        for name, child in module._modules.items():
+            if child is not None:
+                load(child, prefix + name + '.')
+    load(module)
+    load = None  # break load->load reference cycle
+    # ignore "num_batches_tracked" of BN layers
+    missing_keys = [
+        key for key in all_missing_keys if 'num_batches_tracked' not in key
+    ]
+    if unexpected_keys:
+        err_msg.append('unexpected key in source '
+                       f'state_dict: {", ".join(unexpected_keys)}\n')
+    if missing_keys:
+        err_msg.append(
+            f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
+    rank, _ = get_dist_info()
+    if len(err_msg) > 0 and rank == 0:
+        err_msg.insert(
+            0, 'The model and loaded state dict do not match exactly\n')
+        err_msg = '\n'.join(err_msg)
+        if strict:
+            raise RuntimeError(err_msg)
+        elif logger is not None:
+            logger.warning(err_msg)
+        else:
+            print(err_msg)
+def load_url_dist(url, model_dir=None):
+    """In distributed setting, this function only download checkpoint at local
+    rank 0."""
+    rank, world_size = get_dist_info()
+    rank = int(os.environ.get('LOCAL_RANK', rank))
+    if rank == 0:
+        checkpoint = model_zoo.load_url(url, model_dir=model_dir)
+    if world_size > 1:
+        torch.distributed.barrier()
+        if rank > 0:
+            checkpoint = model_zoo.load_url(url, model_dir=model_dir)
+    return checkpoint
+def load_pavimodel_dist(model_path, map_location=None):
+    """In distributed setting, this function only download checkpoint at local
+    rank 0."""
+    try:
+        from pavi import modelcloud
+    except ImportError:
+        raise ImportError(
+            'Please install pavi to load checkpoint from modelcloud.')
+    rank, world_size = get_dist_info()
+    rank = int(os.environ.get('LOCAL_RANK', rank))
+    if rank == 0:
+        model = modelcloud.get(model_path)
+        with TemporaryDirectory() as tmp_dir:
+            downloaded_file = osp.join(tmp_dir, model.name)
+            model.download(downloaded_file)
+            checkpoint = torch.load(downloaded_file, map_location=map_location)
+    if world_size > 1:
+        torch.distributed.barrier()
+        if rank > 0:
+            model = modelcloud.get(model_path)
+            with TemporaryDirectory() as tmp_dir:
+                downloaded_file = osp.join(tmp_dir, model.name)
+                model.download(downloaded_file)
+                checkpoint = torch.load(
+                    downloaded_file, map_location=map_location)
+    return checkpoint
+def load_fileclient_dist(filename, backend, map_location):
+    """In distributed setting, this function only download checkpoint at local
+    rank 0."""
+    rank, world_size = get_dist_info()
+    rank = int(os.environ.get('LOCAL_RANK', rank))
+    allowed_backends = ['ceph']
+    if backend not in allowed_backends:
+        raise ValueError(f'Load from Backend {backend} is not supported.')
+    if rank == 0:
+        fileclient = FileClient(backend=backend)
+        buffer = io.BytesIO(fileclient.get(filename))
+        checkpoint = torch.load(buffer, map_location=map_location)
+    if world_size > 1:
+        torch.distributed.barrier()
+        if rank > 0:
+            fileclient = FileClient(backend=backend)
+            buffer = io.BytesIO(fileclient.get(filename))
+            checkpoint = torch.load(buffer, map_location=map_location)
+    return checkpoint
+def get_torchvision_models():
+    model_urls = dict()
+    for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__):
+        if ispkg:
+            continue
+        _zoo = import_module(f'torchvision.models.{name}')
+        if hasattr(_zoo, 'model_urls'):
+            _urls = getattr(_zoo, 'model_urls')
+            model_urls.update(_urls)
+    return model_urls
+def get_external_models():
+    mmcv_home = _get_mmcv_home()
+    default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')
+    default_urls = load_file(default_json_path)
+    assert isinstance(default_urls, dict)
+    external_json_path = osp.join(mmcv_home, 'open_mmlab.json')
+    if osp.exists(external_json_path):
+        external_urls = load_file(external_json_path)
+        assert isinstance(external_urls, dict)
+        default_urls.update(external_urls)
+    return default_urls
+def get_mmcls_models():
+    mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json')
+    mmcls_urls = load_file(mmcls_json_path)
+    return mmcls_urls
+def get_deprecated_model_names():
+    deprecate_json_path = osp.join(mmcv.__path__[0],
+                                   'model_zoo/deprecated.json')
+    deprecate_urls = load_file(deprecate_json_path)
+    assert isinstance(deprecate_urls, dict)
+    return deprecate_urls
+def _process_mmcls_checkpoint(checkpoint):
+    state_dict = checkpoint['state_dict']
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        if k.startswith('backbone.'):
+            new_state_dict[k[9:]] = v
+    new_checkpoint = dict(state_dict=new_state_dict)
+    return new_checkpoint
+def _load_checkpoint(filename, map_location=None):
+    """Load checkpoint from somewhere (modelzoo, file, url).
+    Args:
+        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+            details.
+        map_location (str | None): Same as :func:`torch.load`. Default: None.
+    Returns:
+        dict | OrderedDict: The loaded checkpoint. It can be either an
+            OrderedDict storing model weights or a dict containing other
+            information, which depends on the checkpoint.
+    """
+    if filename.startswith('modelzoo://'):
+        warnings.warn('The URL scheme of "modelzoo://" is deprecated, please '
+                      'use "torchvision://" instead')
+        model_urls = get_torchvision_models()
+        model_name = filename[11:]
+        checkpoint = load_url_dist(model_urls[model_name])
+    elif filename.startswith('torchvision://'):
+        model_urls = get_torchvision_models()
+        model_name = filename[14:]
+        checkpoint = load_url_dist(model_urls[model_name])
+    elif filename.startswith('open-mmlab://'):
+        model_urls = get_external_models()
+        model_name = filename[13:]
+        deprecated_urls = get_deprecated_model_names()
+        if model_name in deprecated_urls:
+            warnings.warn(f'open-mmlab://{model_name} is deprecated in favor '
+                          f'of open-mmlab://{deprecated_urls[model_name]}')
+            model_name = deprecated_urls[model_name]
+        model_url = model_urls[model_name]
+        # check if is url
+        if model_url.startswith(('http://', 'https://')):
+            checkpoint = load_url_dist(model_url)
+        else:
+            filename = osp.join(_get_mmcv_home(), model_url)
+            if not osp.isfile(filename):
+                raise IOError(f'{filename} is not a checkpoint file')
+            checkpoint = torch.load(filename, map_location=map_location)
+    elif filename.startswith('mmcls://'):
+        model_urls = get_mmcls_models()
+        model_name = filename[8:]
+        checkpoint = load_url_dist(model_urls[model_name])
+        checkpoint = _process_mmcls_checkpoint(checkpoint)
+    elif filename.startswith(('http://', 'https://')):
+        checkpoint = load_url_dist(filename)
+    elif filename.startswith('pavi://'):
+        model_path = filename[7:]
+        checkpoint = load_pavimodel_dist(model_path, map_location=map_location)
+    elif filename.startswith('s3://'):
+        checkpoint = load_fileclient_dist(
+            filename, backend='ceph', map_location=map_location)
+    else:
+        if not osp.isfile(filename):
+            raise IOError(f'{filename} is not a checkpoint file')
+        checkpoint = torch.load(filename, map_location=map_location)
+    return checkpoint
+def load_checkpoint(model,
+                    filename,
+                    map_location='cpu',
+                    strict=False,
+                    logger=None):
+    """Load checkpoint from a file or URI.
+    Args:
+        model (Module): Module to load checkpoint.
+        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+            details.
+        map_location (str): Same as :func:`torch.load`.
+        strict (bool): Whether to allow different params for the model and
+            checkpoint.
+        logger (:mod:`logging.Logger` or None): The logger for error message.
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    checkpoint = _load_checkpoint(filename, map_location)
+    # OrderedDict is a subclass of dict
+    if not isinstance(checkpoint, dict):
+        raise RuntimeError(
+            f'No state_dict found in checkpoint file {filename}')
+    # get state_dict from checkpoint
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+    # strip prefix of state_dict
+    if list(state_dict.keys())[0].startswith('module.'):
+        state_dict = {k[7:]: v for k, v in state_dict.items()}
+    # for MoBY, load model of online branch
+    if sorted(list(state_dict.keys()))[0].startswith('encoder'):
+        state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')}
+    # reshape absolute position embedding
+    if state_dict.get('absolute_pos_embed') is not None:
+        absolute_pos_embed = state_dict['absolute_pos_embed']
+        N1, L, C1 = absolute_pos_embed.size()
+        N2, C2, H, W = model.absolute_pos_embed.size()
+        if N1 != N2 or C1 != C2 or L != H*W:
+            logger.warning("Error in loading absolute_pos_embed, pass")
+        else:
+            state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2)
+    # interpolate position bias table if needed
+    relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k]
+    for table_key in relative_position_bias_table_keys:
+        table_pretrained = state_dict[table_key]
+        table_current = model.state_dict()[table_key]
+        L1, nH1 = table_pretrained.size()
+        L2, nH2 = table_current.size()
+        if nH1 != nH2:
+            logger.warning(f"Error in loading {table_key}, pass")
+        else:
+            if L1 != L2:
+                S1 = int(L1 ** 0.5)
+                S2 = int(L2 ** 0.5)
+                table_pretrained_resized = F.interpolate(
+                     table_pretrained.permute(1, 0).view(1, nH1, S1, S1),
+                     size=(S2, S2), mode='bicubic')
+                state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0)
+    # load state_dict
+    load_state_dict(model, state_dict, strict, logger)
+    return checkpoint
+def weights_to_cpu(state_dict):
+    """Copy a model state_dict to cpu.
+    Args:
+        state_dict (OrderedDict): Model weights on GPU.
+    Returns:
+        OrderedDict: Model weights on GPU.
+    """
+    state_dict_cpu = OrderedDict()
+    for key, val in state_dict.items():
+        state_dict_cpu[key] = val.cpu()
+    return state_dict_cpu
+def _save_to_state_dict(module, destination, prefix, keep_vars):
+    """Saves module state to `destination` dictionary.
+    This method is modified from :meth:`torch.nn.Module._save_to_state_dict`.
+    Args:
+        module (nn.Module): The module to generate state_dict.
+        destination (dict): A dict where state will be stored.
+        prefix (str): The prefix for parameters and buffers used in this
+            module.
+    """
+    for name, param in module._parameters.items():
+        if param is not None:
+            destination[prefix + name] = param if keep_vars else param.detach()
+    for name, buf in module._buffers.items():
+        # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d
+        if buf is not None:
+            destination[prefix + name] = buf if keep_vars else buf.detach()
+def get_state_dict(module, destination=None, prefix='', keep_vars=False):
+    """Returns a dictionary containing a whole state of the module.
+    Both parameters and persistent buffers (e.g. running averages) are
+    included. Keys are corresponding parameter and buffer names.
+    This method is modified from :meth:`torch.nn.Module.state_dict` to
+    recursively check parallel module in case that the model has a complicated
+    structure, e.g., nn.Module(nn.Module(DDP)).
+    Args:
+        module (nn.Module): The module to generate state_dict.
+        destination (OrderedDict): Returned dict for the state of the
+            module.
+        prefix (str): Prefix of the key.
+        keep_vars (bool): Whether to keep the variable property of the
+            parameters. Default: False.
+    Returns:
+        dict: A dictionary containing a whole state of the module.
+    """
+    # recursively check parallel module in case that the model has a
+    # complicated structure, e.g., nn.Module(nn.Module(DDP))
+    if is_module_wrapper(module):
+        module = module.module
+    # below is the same as torch.nn.Module.state_dict()
+    if destination is None:
+        destination = OrderedDict()
+        destination._metadata = OrderedDict()
+    destination._metadata[prefix[:-1]] = local_metadata = dict(
+        version=module._version)
+    _save_to_state_dict(module, destination, prefix, keep_vars)
+    for name, child in module._modules.items():
+        if child is not None:
+            get_state_dict(
+                child, destination, prefix + name + '.', keep_vars=keep_vars)
+    for hook in module._state_dict_hooks.values():
+        hook_result = hook(module, destination, prefix, local_metadata)
+        if hook_result is not None:
+            destination = hook_result
+    return destination
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+    """Save checkpoint to file.
+    The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
+    ``optimizer``. By default ``meta`` will contain version and time info.
+    Args:
+        model (Module): Module whose params are to be saved.
+        filename (str): Checkpoint filename.
+        optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+        meta (dict, optional): Metadata to be saved in checkpoint.
+    """
+    if meta is None:
+        meta = {}
+    elif not isinstance(meta, dict):
+        raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+    meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+    if is_module_wrapper(model):
+        model = model.module
+    if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+        # save class name to the meta
+        meta.update(CLASSES=model.CLASSES)
+    checkpoint = {
+        'meta': meta,
+        'state_dict': weights_to_cpu(get_state_dict(model))
+    }
+    # save optimizer state dict in the checkpoint
+    if isinstance(optimizer, Optimizer):
+        checkpoint['optimizer'] = optimizer.state_dict()
+    elif isinstance(optimizer, dict):
+        checkpoint['optimizer'] = {}
+        for name, optim in optimizer.items():
+            checkpoint['optimizer'][name] = optim.state_dict()
+    if filename.startswith('pavi://'):
+        try:
+            from pavi import modelcloud
+            from pavi.exception import NodeNotFoundError
+        except ImportError:
+            raise ImportError(
+                'Please install pavi to load checkpoint from modelcloud.')
+        model_path = filename[7:]
+        root = modelcloud.Folder()
+        model_dir, model_name = osp.split(model_path)
+        try:
+            model = modelcloud.get(model_dir)
+        except NodeNotFoundError:
+            model = root.create_training_model(model_dir)
+        with TemporaryDirectory() as tmp_dir:
+            checkpoint_file = osp.join(tmp_dir, model_name)
+            with open(checkpoint_file, 'wb') as f:
+                torch.save(checkpoint, f)
+                f.flush()
+            model.create_file(checkpoint_file, name=model_name)
+    else:
+        mmcv.mkdir_or_exist(osp.dirname(filename))
+        # immediately flush buffer
+        with open(filename, 'wb') as f:
+            torch.save(checkpoint, f)
+            f.flush()

mmcv_custom/runner/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Copyright (c) Open-MMLab. All rights reserved.
+from .checkpoint import save_checkpoint
+from .epoch_based_runner import EpochBasedRunnerAmp
+__all__ = [
+    'EpochBasedRunnerAmp', 'save_checkpoint'
+]

mmcv_custom/runner/checkpoint.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import time
+from tempfile import TemporaryDirectory
+import torch
+from torch.optim import Optimizer
+import mmcv
+from mmcv.parallel import is_module_wrapper
+from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
+try:
+    import apex
+except:
+    print('apex is not installed')
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+    """Save checkpoint to file.
+    The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
+    ``optimizer``, ``amp``. By default ``meta`` will contain version
+    and time info.
+    Args:
+        model (Module): Module whose params are to be saved.
+        filename (str): Checkpoint filename.
+        optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+        meta (dict, optional): Metadata to be saved in checkpoint.
+    """
+    if meta is None:
+        meta = {}
+    elif not isinstance(meta, dict):
+        raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+    meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+    if is_module_wrapper(model):
+        model = model.module
+    if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+        # save class name to the meta
+        meta.update(CLASSES=model.CLASSES)
+    checkpoint = {
+        'meta': meta,
+        'state_dict': weights_to_cpu(get_state_dict(model))
+    }
+    # save optimizer state dict in the checkpoint
+    if isinstance(optimizer, Optimizer):
+        checkpoint['optimizer'] = optimizer.state_dict()
+    elif isinstance(optimizer, dict):
+        checkpoint['optimizer'] = {}
+        for name, optim in optimizer.items():
+            checkpoint['optimizer'][name] = optim.state_dict()
+    # save amp state dict in the checkpoint
+    checkpoint['amp'] = apex.amp.state_dict()
+    if filename.startswith('pavi://'):
+        try:
+            from pavi import modelcloud
+            from pavi.exception import NodeNotFoundError
+        except ImportError:
+            raise ImportError(
+                'Please install pavi to load checkpoint from modelcloud.')
+        model_path = filename[7:]
+        root = modelcloud.Folder()
+        model_dir, model_name = osp.split(model_path)
+        try:
+            model = modelcloud.get(model_dir)
+        except NodeNotFoundError:
+            model = root.create_training_model(model_dir)
+        with TemporaryDirectory() as tmp_dir:
+            checkpoint_file = osp.join(tmp_dir, model_name)
+            with open(checkpoint_file, 'wb') as f:
+                torch.save(checkpoint, f)
+                f.flush()
+            model.create_file(checkpoint_file, name=model_name)
+    else:
+        mmcv.mkdir_or_exist(osp.dirname(filename))
+        # immediately flush buffer
+        with open(filename, 'wb') as f:
+            torch.save(checkpoint, f)
+            f.flush()

mmcv_custom/runner/epoch_based_runner.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import platform
+import shutil
+import torch
+from torch.optim import Optimizer
+import mmcv
+from mmcv.runner import RUNNERS, EpochBasedRunner
+from .checkpoint import save_checkpoint
+try:
+    import apex
+except:
+    print('apex is not installed')
+@RUNNERS.register_module()
+class EpochBasedRunnerAmp(EpochBasedRunner):
+    """Epoch-based Runner with AMP support.
+    This runner train models epoch by epoch.
+    """
+    def save_checkpoint(self,
+                        out_dir,
+                        filename_tmpl='epoch_{}.pth',
+                        save_optimizer=True,
+                        meta=None,
+                        create_symlink=True):
+        """Save the checkpoint.
+        Args:
+            out_dir (str): The directory that checkpoints are saved.
+            filename_tmpl (str, optional): The checkpoint filename template,
+                which contains a placeholder for the epoch number.
+                Defaults to 'epoch_{}.pth'.
+            save_optimizer (bool, optional): Whether to save the optimizer to
+                the checkpoint. Defaults to True.
+            meta (dict, optional): The meta information to be saved in the
+                checkpoint. Defaults to None.
+            create_symlink (bool, optional): Whether to create a symlink
+                "latest.pth" to point to the latest checkpoint.
+                Defaults to True.
+        """
+        if meta is None:
+            meta = dict(epoch=self.epoch + 1, iter=self.iter)
+        elif isinstance(meta, dict):
+            meta.update(epoch=self.epoch + 1, iter=self.iter)
+        else:
+            raise TypeError(
+                f'meta should be a dict or None, but got {type(meta)}')
+        if self.meta is not None:
+            meta.update(self.meta)
+        filename = filename_tmpl.format(self.epoch + 1)
+        filepath = osp.join(out_dir, filename)
+        optimizer = self.optimizer if save_optimizer else None
+        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
+        # in some environments, `os.symlink` is not supported, you may need to
+        # set `create_symlink` to False
+        if create_symlink:
+            dst_file = osp.join(out_dir, 'latest.pth')
+            if platform.system() != 'Windows':
+                mmcv.symlink(filename, dst_file)
+            else:
+                shutil.copy(filepath, dst_file)
+    def resume(self,
+               checkpoint,
+               resume_optimizer=True,
+               map_location='default'):
+        if map_location == 'default':
+            if torch.cuda.is_available():
+                device_id = torch.cuda.current_device()
+                checkpoint = self.load_checkpoint(
+                    checkpoint,
+                    map_location=lambda storage, loc: storage.cuda(device_id))
+            else:
+                checkpoint = self.load_checkpoint(checkpoint)
+        else:
+            checkpoint = self.load_checkpoint(
+                checkpoint, map_location=map_location)
+        self._epoch = checkpoint['meta']['epoch']
+        self._iter = checkpoint['meta']['iter']
+        if 'optimizer' in checkpoint and resume_optimizer:
+            if isinstance(self.optimizer, Optimizer):
+                self.optimizer.load_state_dict(checkpoint['optimizer'])
+            elif isinstance(self.optimizer, dict):
+                for k in self.optimizer.keys():
+                    self.optimizer[k].load_state_dict(
+                        checkpoint['optimizer'][k])
+            else:
+                raise TypeError(
+                    'Optimizer should be dict or torch.optim.Optimizer '
+                    f'but got {type(self.optimizer)}')
+        if 'amp' in checkpoint:
+            apex.amp.load_state_dict(checkpoint['amp'])
+            self.logger.info('load amp state dict')
+        self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)

mmdet/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import mmcv
+from .version import __version__, short_version
+def digit_version(version_str):
+    digit_version = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            digit_version.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            digit_version.append(int(patch_version[0]) - 1)
+            digit_version.append(int(patch_version[1]))
+    return digit_version
+mmcv_minimum_version = '1.2.4'
+mmcv_maximum_version = '1.4.0'
+mmcv_version = digit_version(mmcv.__version__)
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version <= digit_version(mmcv_maximum_version)), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
+__all__ = ['__version__', 'short_version']

mmdet/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .inference import (async_inference_detector, inference_detector,
+                        init_detector, show_result_pyplot)
+from .test import multi_gpu_test, single_gpu_test
+from .train import get_root_logger, set_random_seed, train_detector
+__all__ = [
+    'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
+    'async_inference_detector', 'inference_detector', 'show_result_pyplot',
+    'multi_gpu_test', 'single_gpu_test'
+]

mmdet/apis/inference.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import warnings
+import mmcv
+import numpy as np
+import torch
+from mmcv.ops import RoIPool
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+from mmdet.core import get_classes
+from mmdet.datasets import replace_ImageToTensor
+from mmdet.datasets.pipelines import Compose
+from mmdet.models import build_detector
+def init_detector(config, checkpoint=None, device='cuda:0', cfg_options=None):
+    """Initialize a detector from config file.
+    Args:
+        config (str or :obj:`mmcv.Config`): Config file path or the config
+            object.
+        checkpoint (str, optional): Checkpoint path. If left as None, the model
+            will not load any weights.
+        cfg_options (dict): Options to override some settings in the used
+            config.
+    Returns:
+        nn.Module: The constructed detector.
+    """
+    if isinstance(config, str):
+        config = mmcv.Config.fromfile(config)
+    elif not isinstance(config, mmcv.Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if cfg_options is not None:
+        config.merge_from_dict(cfg_options)
+    config.model.pretrained = None
+    config.model.train_cfg = None
+    model = build_detector(config.model, test_cfg=config.get('test_cfg'))
+    if checkpoint is not None:
+        map_loc = 'cpu' if device == 'cpu' else None
+        checkpoint = load_checkpoint(model, checkpoint, map_location=map_loc)
+        if 'CLASSES' in checkpoint.get('meta', {}):
+            model.CLASSES = checkpoint['meta']['CLASSES']
+        else:
+            warnings.simplefilter('once')
+            warnings.warn('Class names are not saved in the checkpoint\'s '
+                          'meta data, use COCO classes by default.')
+            model.CLASSES = get_classes('coco')
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model
+class LoadImage(object):
+    """Deprecated.
+    A simple pipeline to load image.
+    """
+    def __call__(self, results):
+        """Call function to load images into results.
+        Args:
+            results (dict): A result dict contains the file name
+                of the image to be read.
+        Returns:
+            dict: ``results`` will be returned containing loaded image.
+        """
+        warnings.simplefilter('once')
+        warnings.warn('`LoadImage` is deprecated and will be removed in '
+                      'future releases. You may use `LoadImageFromWebcam` '
+                      'from `mmdet.datasets.pipelines.` instead.')
+        if isinstance(results['img'], str):
+            results['filename'] = results['img']
+            results['ori_filename'] = results['img']
+        else:
+            results['filename'] = None
+            results['ori_filename'] = None
+        img = mmcv.imread(results['img'])
+        results['img'] = img
+        results['img_fields'] = ['img']
+        results['img_shape'] = img.shape
+        results['ori_shape'] = img.shape
+        return results
+def inference_detector(model, imgs):
+    """Inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]):
+           Either image files or loaded images.
+    Returns:
+        If imgs is a list or tuple, the same length list type results
+        will be returned, otherwise return the detection results directly.
+    """
+    if isinstance(imgs, (list, tuple)):
+        is_batch = True
+    else:
+        imgs = [imgs]
+        is_batch = False
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    if isinstance(imgs[0], np.ndarray):
+        cfg = cfg.copy()
+        # set loading pipeline type
+        cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
+    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
+    test_pipeline = Compose(cfg.data.test.pipeline)
+    datas = []
+    for img in imgs:
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # directly add img
+            data = dict(img=img)
+        else:
+            # add information into dict
+            data = dict(img_info=dict(filename=img), img_prefix=None)
+        # build the data pipeline
+        data = test_pipeline(data)
+        datas.append(data)
+    data = collate(datas, samples_per_gpu=len(imgs))
+    # just get the actual data from DataContainer
+    data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']]
+    data['img'] = [img.data[0] for img in data['img']]
+    if next(model.parameters()).is_cuda:
+        # scatter to specified GPU
+        data = scatter(data, [device])[0]
+    else:
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), 'CPU inference with RoIPool is not supported currently.'
+    # forward the model
+    with torch.no_grad():
+        results = model(return_loss=False, rescale=True, **data)
+    if not is_batch:
+        return results[0]
+    else:
+        return results
+async def async_inference_detector(model, img):
+    """Async inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        img (str | ndarray): Either image files or loaded images.
+    Returns:
+        Awaitable detection results.
+    """
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    # prepare data
+    if isinstance(img, np.ndarray):
+        # directly add img
+        data = dict(img=img)
+        cfg = cfg.copy()
+        # set loading pipeline type
+        cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
+    else:
+        # add information into dict
+        data = dict(img_info=dict(filename=img), img_prefix=None)
+    # build the data pipeline
+    test_pipeline = Compose(cfg.data.test.pipeline)
+    data = test_pipeline(data)
+    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
+    # We don't restore `torch.is_grad_enabled()` value during concurrent
+    # inference since execution can overlap
+    torch.set_grad_enabled(False)
+    result = await model.aforward_test(rescale=True, **data)
+    return result
+def show_result_pyplot(model,
+                       img,
+                       result,
+                       score_thr=0.3,
+                       title='result',
+                       wait_time=0):
+    """Visualize the detection results on the image.
+    Args:
+        model (nn.Module): The loaded detector.
+        img (str or np.ndarray): Image filename or loaded image.
+        result (tuple[list] or list): The detection result, can be either
+            (bbox, segm) or just bbox.
+        score_thr (float): The threshold to visualize the bboxes and masks.
+        title (str): Title of the pyplot figure.
+        wait_time (float): Value of waitKey param.
+                Default: 0.
+    """
+    if hasattr(model, 'module'):
+        model = model.module
+    model.show_result(
+        img,
+        result,
+        score_thr=score_thr,
+        show=True,
+        wait_time=wait_time,
+        win_name=title,
+        bbox_color=(72, 101, 241),
+        text_color=(72, 101, 241))

mmdet/apis/test.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+from mmdet.core import encode_mask_results
+def single_gpu_test(model,
+                    data_loader,
+                    show=False,
+                    out_dir=None,
+                    show_score_thr=0.3):
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    prog_bar = mmcv.ProgressBar(len(dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+        batch_size = len(result)
+        if show or out_dir:
+            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
+                img_tensor = data['img'][0]
+            else:
+                img_tensor = data['img'][0].data[0]
+            img_metas = data['img_metas'][0].data[0]
+            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
+            assert len(imgs) == len(img_metas)
+            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
+                h, w, _ = img_meta['img_shape']
+                img_show = img[:h, :w, :]
+                ori_h, ori_w = img_meta['ori_shape'][:-1]
+                img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+                if out_dir:
+                    out_file = osp.join(out_dir, img_meta['ori_filename'])
+                else:
+                    out_file = None
+                model.module.show_result(
+                    img_show,
+                    result[i],
+                    show=show,
+                    out_file=out_file,
+                    score_thr=show_score_thr)
+        # encode mask results
+        if isinstance(result[0], tuple):
+            result = [(bbox_results, encode_mask_results(mask_results))
+                      for bbox_results, mask_results in result]
+        results.extend(result)
+        for _ in range(batch_size):
+            prog_bar.update()
+    return results
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    """Test model with multiple gpus.
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    time.sleep(2)  # This line can prevent deadlock problem in some cases.
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+            # encode mask results
+            if isinstance(result[0], tuple):
+                result = [(bbox_results, encode_mask_results(mask_results))
+                          for bbox_results, mask_results in result]
+        results.extend(result)
+        if rank == 0:
+            batch_size = len(result)
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+    # collect results from all ranks
+    if gpu_collect:
+        results = collect_results_gpu(results, len(dataset))
+    else:
+        results = collect_results_cpu(results, len(dataset), tmpdir)
+    return results
+def collect_results_cpu(result_part, size, tmpdir=None):
+    rank, world_size = get_dist_info()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full((MAX_LEN, ),
+                                32,
+                                dtype=torch.uint8,
+                                device='cuda')
+        if rank == 0:
+            mmcv.mkdir_or_exist('.dist_test')
+            tmpdir = tempfile.mkdtemp(dir='.dist_test')
+            tmpdir = torch.tensor(
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+            dir_tensor[:len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = osp.join(tmpdir, f'part_{i}.pkl')
+            part_list.append(mmcv.load(part_file))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return ordered_results
+def collect_results_gpu(result_part, size):
+    rank, world_size = get_dist_info()
+    # dump result part to tensor with pickle
+    part_tensor = torch.tensor(
+        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+    # gather all result part tensor shape
+    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_list = [shape_tensor.clone() for _ in range(world_size)]
+    dist.all_gather(shape_list, shape_tensor)
+    # padding result part tensor to max length
+    shape_max = torch.tensor(shape_list).max()
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send[:shape_tensor[0]] = part_tensor
+    part_recv_list = [
+        part_tensor.new_zeros(shape_max) for _ in range(world_size)
+    ]
+    # gather all result part
+    dist.all_gather(part_recv_list, part_send)
+    if rank == 0:
+        part_list = []
+        for recv, shape in zip(part_recv_list, shape_list):
+            part_list.append(
+                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        return ordered_results

mmdet/apis/train.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import random
+import warnings
+import numpy as np
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
+                         Fp16OptimizerHook, OptimizerHook, build_optimizer,
+                         build_runner)
+from mmcv.utils import build_from_cfg
+from mmdet.core import DistEvalHook, EvalHook
+from mmdet.datasets import (build_dataloader, build_dataset,
+                            replace_ImageToTensor)
+from mmdet.utils import get_root_logger
+from mmcv_custom.runner import EpochBasedRunnerAmp
+try:
+    import apex
+except:
+    print('apex is not installed')
+def set_random_seed(seed, deterministic=False):
+    """Set random seed.
+    Args:
+        seed (int): Seed to be used.
+        deterministic (bool): Whether to set the deterministic option for
+            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+            to True and `torch.backends.cudnn.benchmark` to False.
+            Default: False.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    if deterministic:
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+def train_detector(model,
+                   dataset,
+                   cfg,
+                   distributed=False,
+                   validate=False,
+                   timestamp=None,
+                   meta=None):
+    logger = get_root_logger(cfg.log_level)
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    if 'imgs_per_gpu' in cfg.data:
+        logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
+                       'Please use "samples_per_gpu" instead')
+        if 'samples_per_gpu' in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f'={cfg.data.imgs_per_gpu} is used in this experiments')
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f'{cfg.data.imgs_per_gpu} in this experiments')
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            len(cfg.gpu_ids),
+            dist=distributed,
+            seed=cfg.seed) for ds in dataset
+    ]
+    # build optimizer
+    optimizer = build_optimizer(model, cfg.optimizer)
+    # use apex fp16 optimizer
+    if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook":
+        if cfg.optimizer_config.get("use_fp16", False):
+            model, optimizer = apex.amp.initialize(
+                model.cuda(), optimizer, opt_level="O1")
+            for m in model.modules():
+                if hasattr(m, "fp16_enabled"):
+                    m.fp16_enabled = True
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', False)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters)
+    else:
+        model = MMDataParallel(
+            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+    if 'runner' not in cfg:
+        cfg.runner = {
+            'type': 'EpochBasedRunner',
+            'max_epochs': cfg.total_epochs
+        }
+        warnings.warn(
+            'config is now expected to have a `runner` section, '
+            'please set `runner` in your config.', UserWarning)
+    else:
+        if 'total_epochs' in cfg:
+            assert cfg.total_epochs == cfg.runner.max_epochs
+    # build runner
+    runner = build_runner(
+        cfg.runner,
+        default_args=dict(
+            model=model,
+            optimizer=optimizer,
+            work_dir=cfg.work_dir,
+            logger=logger,
+            meta=meta))
+    # an ugly workaround to make .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+    elif distributed and 'type' not in cfg.optimizer_config:
+        optimizer_config = OptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config,
+                                   cfg.get('momentum_config', None))
+    if distributed:
+        if isinstance(runner, EpochBasedRunner):
+            runner.register_hook(DistSamplerSeedHook())
+    # register eval hooks
+    if validate:
+        # Support batch_size > 1 in validation
+        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
+        if val_samples_per_gpu > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.val.pipeline = replace_ImageToTensor(
+                cfg.data.val.pipeline)
+        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=val_samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False)
+        eval_cfg = cfg.get('evaluation', {})
+        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
+        eval_hook = DistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+    # user-defined hooks
+    if cfg.get('custom_hooks', None):
+        custom_hooks = cfg.custom_hooks
+        assert isinstance(custom_hooks, list), \
+            f'custom_hooks expect list type, but got {type(custom_hooks)}'
+        for hook_cfg in cfg.custom_hooks:
+            assert isinstance(hook_cfg, dict), \
+                'Each item in custom_hooks expects dict type, but got ' \
+                f'{type(hook_cfg)}'
+            hook_cfg = hook_cfg.copy()
+            priority = hook_cfg.pop('priority', 'NORMAL')
+            hook = build_from_cfg(hook_cfg, HOOKS)
+            runner.register_hook(hook, priority=priority)
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow)

mmdet/core/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .anchor import *  # noqa: F401, F403
+from .bbox import *  # noqa: F401, F403
+from .evaluation import *  # noqa: F401, F403
+from .export import *  # noqa: F401, F403
+from .mask import *  # noqa: F401, F403
+from .post_processing import *  # noqa: F401, F403
+from .utils import *  # noqa: F401, F403

mmdet/core/anchor/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator,
+                               YOLOAnchorGenerator)
+from .builder import ANCHOR_GENERATORS, build_anchor_generator
+from .point_generator import PointGenerator
+from .utils import anchor_inside_flags, calc_region, images_to_levels
+__all__ = [
+    'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags',
+    'PointGenerator', 'images_to_levels', 'calc_region',
+    'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator'
+]

mmdet/core/anchor/anchor_generator.py ADDED Viewed

	@@ -0,0 +1,727 @@

+import mmcv
+import numpy as np
+import torch
+from torch.nn.modules.utils import _pair
+from .builder import ANCHOR_GENERATORS
+@ANCHOR_GENERATORS.register_module()
+class AnchorGenerator(object):
+    """Standard anchor generator for 2D anchor-based detectors.
+    Args:
+        strides (list[int] | list[tuple[int, int]]): Strides of anchors
+            in multiple feature levels in order (w, h).
+        ratios (list[float]): The list of ratios between the height and width
+            of anchors in a single level.
+        scales (list[int] | None): Anchor scales for anchors in a single level.
+            It cannot be set at the same time if `octave_base_scale` and
+            `scales_per_octave` are set.
+        base_sizes (list[int] | None): The basic sizes
+            of anchors in multiple levels.
+            If None is given, strides will be used as base_sizes.
+            (If strides are non square, the shortest stride is taken.)
+        scale_major (bool): Whether to multiply scales first when generating
+            base anchors. If true, the anchors in the same row will have the
+            same scales. By default it is True in V2.0
+        octave_base_scale (int): The base scale of octave.
+        scales_per_octave (int): Number of scales for each octave.
+            `octave_base_scale` and `scales_per_octave` are usually used in
+            retinanet and the `scales` should be None when they are set.
+        centers (list[tuple[float, float]] | None): The centers of the anchor
+            relative to the feature grid center in multiple feature levels.
+            By default it is set to be None and not used. If a list of tuple of
+            float is given, they will be used to shift the centers of anchors.
+        center_offset (float): The offset of center in proportion to anchors'
+            width and height. By default it is 0 in V2.0.
+    Examples:
+        >>> from mmdet.core import AnchorGenerator
+        >>> self = AnchorGenerator([16], [1.], [1.], [9])
+        >>> all_anchors = self.grid_anchors([(2, 2)], device='cpu')
+        >>> print(all_anchors)
+        [tensor([[-4.5000, -4.5000,  4.5000,  4.5000],
+                [11.5000, -4.5000, 20.5000,  4.5000],
+                [-4.5000, 11.5000,  4.5000, 20.5000],
+                [11.5000, 11.5000, 20.5000, 20.5000]])]
+        >>> self = AnchorGenerator([16, 32], [1.], [1.], [9, 18])
+        >>> all_anchors = self.grid_anchors([(2, 2), (1, 1)], device='cpu')
+        >>> print(all_anchors)
+        [tensor([[-4.5000, -4.5000,  4.5000,  4.5000],
+                [11.5000, -4.5000, 20.5000,  4.5000],
+                [-4.5000, 11.5000,  4.5000, 20.5000],
+                [11.5000, 11.5000, 20.5000, 20.5000]]), \
+        tensor([[-9., -9., 9., 9.]])]
+    """
+    def __init__(self,
+                 strides,
+                 ratios,
+                 scales=None,
+                 base_sizes=None,
+                 scale_major=True,
+                 octave_base_scale=None,
+                 scales_per_octave=None,
+                 centers=None,
+                 center_offset=0.):
+        # check center and center_offset
+        if center_offset != 0:
+            assert centers is None, 'center cannot be set when center_offset' \
+                f'!=0, {centers} is given.'
+        if not (0 <= center_offset <= 1):
+            raise ValueError('center_offset should be in range [0, 1], '
+                             f'{center_offset} is given.')
+        if centers is not None:
+            assert len(centers) == len(strides), \
+                'The number of strides should be the same as centers, got ' \
+                f'{strides} and {centers}'
+        # calculate base sizes of anchors
+        self.strides = [_pair(stride) for stride in strides]
+        self.base_sizes = [min(stride) for stride in self.strides
+                           ] if base_sizes is None else base_sizes
+        assert len(self.base_sizes) == len(self.strides), \
+            'The number of strides should be the same as base sizes, got ' \
+            f'{self.strides} and {self.base_sizes}'
+        # calculate scales of anchors
+        assert ((octave_base_scale is not None
+                and scales_per_octave is not None) ^ (scales is not None)), \
+            'scales and octave_base_scale with scales_per_octave cannot' \
+            ' be set at the same time'
+        if scales is not None:
+            self.scales = torch.Tensor(scales)
+        elif octave_base_scale is not None and scales_per_octave is not None:
+            octave_scales = np.array(
+                [2**(i / scales_per_octave) for i in range(scales_per_octave)])
+            scales = octave_scales * octave_base_scale
+            self.scales = torch.Tensor(scales)
+        else:
+            raise ValueError('Either scales or octave_base_scale with '
+                             'scales_per_octave should be set')
+        self.octave_base_scale = octave_base_scale
+        self.scales_per_octave = scales_per_octave
+        self.ratios = torch.Tensor(ratios)
+        self.scale_major = scale_major
+        self.centers = centers
+        self.center_offset = center_offset
+        self.base_anchors = self.gen_base_anchors()
+    @property
+    def num_base_anchors(self):
+        """list[int]: total number of base anchors in a feature grid"""
+        return [base_anchors.size(0) for base_anchors in self.base_anchors]
+    @property
+    def num_levels(self):
+        """int: number of feature levels that the generator will be applied"""
+        return len(self.strides)
+    def gen_base_anchors(self):
+        """Generate base anchors.
+        Returns:
+            list(torch.Tensor): Base anchors of a feature grid in multiple \
+                feature levels.
+        """
+        multi_level_base_anchors = []
+        for i, base_size in enumerate(self.base_sizes):
+            center = None
+            if self.centers is not None:
+                center = self.centers[i]
+            multi_level_base_anchors.append(
+                self.gen_single_level_base_anchors(
+                    base_size,
+                    scales=self.scales,
+                    ratios=self.ratios,
+                    center=center))
+        return multi_level_base_anchors
+    def gen_single_level_base_anchors(self,
+                                      base_size,
+                                      scales,
+                                      ratios,
+                                      center=None):
+        """Generate base anchors of a single level.
+        Args:
+            base_size (int | float): Basic size of an anchor.
+            scales (torch.Tensor): Scales of the anchor.
+            ratios (torch.Tensor): The ratio between between the height
+                and width of anchors in a single level.
+            center (tuple[float], optional): The center of the base anchor
+                related to a single feature grid. Defaults to None.
+        Returns:
+            torch.Tensor: Anchors in a single-level feature maps.
+        """
+        w = base_size
+        h = base_size
+        if center is None:
+            x_center = self.center_offset * w
+            y_center = self.center_offset * h
+        else:
+            x_center, y_center = center
+        h_ratios = torch.sqrt(ratios)
+        w_ratios = 1 / h_ratios
+        if self.scale_major:
+            ws = (w * w_ratios[:, None] * scales[None, :]).view(-1)
+            hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)
+        else:
+            ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)
+            hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)
+        # use float anchor and the anchor's center is aligned with the
+        # pixel center
+        base_anchors = [
+            x_center - 0.5 * ws, y_center - 0.5 * hs, x_center + 0.5 * ws,
+            y_center + 0.5 * hs
+        ]
+        base_anchors = torch.stack(base_anchors, dim=-1)
+        return base_anchors
+    def _meshgrid(self, x, y, row_major=True):
+        """Generate mesh grid of x and y.
+        Args:
+            x (torch.Tensor): Grids of x dimension.
+            y (torch.Tensor): Grids of y dimension.
+            row_major (bool, optional): Whether to return y grids first.
+                Defaults to True.
+        Returns:
+            tuple[torch.Tensor]: The mesh grids of x and y.
+        """
+        # use shape instead of len to keep tracing while exporting to onnx
+        xx = x.repeat(y.shape[0])
+        yy = y.view(-1, 1).repeat(1, x.shape[0]).view(-1)
+        if row_major:
+            return xx, yy
+        else:
+            return yy, xx
+    def grid_anchors(self, featmap_sizes, device='cuda'):
+        """Generate grid anchors in multiple feature levels.
+        Args:
+            featmap_sizes (list[tuple]): List of feature map sizes in
+                multiple feature levels.
+            device (str): Device where the anchors will be put on.
+        Return:
+            list[torch.Tensor]: Anchors in multiple feature levels. \
+                The sizes of each tensor should be [N, 4], where \
+                N = width * height * num_base_anchors, width and height \
+                are the sizes of the corresponding feature level, \
+                num_base_anchors is the number of anchors for that level.
+        """
+        assert self.num_levels == len(featmap_sizes)
+        multi_level_anchors = []
+        for i in range(self.num_levels):
+            anchors = self.single_level_grid_anchors(
+                self.base_anchors[i].to(device),
+                featmap_sizes[i],
+                self.strides[i],
+                device=device)
+            multi_level_anchors.append(anchors)
+        return multi_level_anchors
+    def single_level_grid_anchors(self,
+                                  base_anchors,
+                                  featmap_size,
+                                  stride=(16, 16),
+                                  device='cuda'):
+        """Generate grid anchors of a single level.
+        Note:
+            This function is usually called by method ``self.grid_anchors``.
+        Args:
+            base_anchors (torch.Tensor): The base anchors of a feature grid.
+            featmap_size (tuple[int]): Size of the feature maps.
+            stride (tuple[int], optional): Stride of the feature map in order
+                (w, h). Defaults to (16, 16).
+            device (str, optional): Device the tensor will be put on.
+                Defaults to 'cuda'.
+        Returns:
+            torch.Tensor: Anchors in the overall feature maps.
+        """
+        # keep as Tensor, so that we can covert to ONNX correctly
+        feat_h, feat_w = featmap_size
+        shift_x = torch.arange(0, feat_w, device=device) * stride[0]
+        shift_y = torch.arange(0, feat_h, device=device) * stride[1]
+        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
+        shifts = shifts.type_as(base_anchors)
+        # first feat_w elements correspond to the first row of shifts
+        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
+        # shifted anchors (K, A, 4), reshape to (K*A, 4)
+        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
+        all_anchors = all_anchors.view(-1, 4)
+        # first A rows correspond to A anchors of (0, 0) in feature map,
+        # then (0, 1), (0, 2), ...
+        return all_anchors
+    def valid_flags(self, featmap_sizes, pad_shape, device='cuda'):
+        """Generate valid flags of anchors in multiple feature levels.
+        Args:
+            featmap_sizes (list(tuple)): List of feature map sizes in
+                multiple feature levels.
+            pad_shape (tuple): The padded shape of the image.
+            device (str): Device where the anchors will be put on.
+        Return:
+            list(torch.Tensor): Valid flags of anchors in multiple levels.
+        """
+        assert self.num_levels == len(featmap_sizes)
+        multi_level_flags = []
+        for i in range(self.num_levels):
+            anchor_stride = self.strides[i]
+            feat_h, feat_w = featmap_sizes[i]
+            h, w = pad_shape[:2]
+            valid_feat_h = min(int(np.ceil(h / anchor_stride[1])), feat_h)
+            valid_feat_w = min(int(np.ceil(w / anchor_stride[0])), feat_w)
+            flags = self.single_level_valid_flags((feat_h, feat_w),
+                                                  (valid_feat_h, valid_feat_w),
+                                                  self.num_base_anchors[i],
+                                                  device=device)
+            multi_level_flags.append(flags)
+        return multi_level_flags
+    def single_level_valid_flags(self,
+                                 featmap_size,
+                                 valid_size,
+                                 num_base_anchors,
+                                 device='cuda'):
+        """Generate the valid flags of anchor in a single feature map.
+        Args:
+            featmap_size (tuple[int]): The size of feature maps.
+            valid_size (tuple[int]): The valid size of the feature maps.
+            num_base_anchors (int): The number of base anchors.
+            device (str, optional): Device where the flags will be put on.
+                Defaults to 'cuda'.
+        Returns:
+            torch.Tensor: The valid flags of each anchor in a single level \
+                feature map.
+        """
+        feat_h, feat_w = featmap_size
+        valid_h, valid_w = valid_size
+        assert valid_h <= feat_h and valid_w <= feat_w
+        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
+        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
+        valid_x[:valid_w] = 1
+        valid_y[:valid_h] = 1
+        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+        valid = valid_xx & valid_yy
+        valid = valid[:, None].expand(valid.size(0),
+                                      num_base_anchors).contiguous().view(-1)
+        return valid
+    def __repr__(self):
+        """str: a string that describes the module"""
+        indent_str = '    '
+        repr_str = self.__class__.__name__ + '(\n'
+        repr_str += f'{indent_str}strides={self.strides},\n'
+        repr_str += f'{indent_str}ratios={self.ratios},\n'
+        repr_str += f'{indent_str}scales={self.scales},\n'
+        repr_str += f'{indent_str}base_sizes={self.base_sizes},\n'
+        repr_str += f'{indent_str}scale_major={self.scale_major},\n'
+        repr_str += f'{indent_str}octave_base_scale='
+        repr_str += f'{self.octave_base_scale},\n'
+        repr_str += f'{indent_str}scales_per_octave='
+        repr_str += f'{self.scales_per_octave},\n'
+        repr_str += f'{indent_str}num_levels={self.num_levels}\n'
+        repr_str += f'{indent_str}centers={self.centers},\n'
+        repr_str += f'{indent_str}center_offset={self.center_offset})'
+        return repr_str
+@ANCHOR_GENERATORS.register_module()
+class SSDAnchorGenerator(AnchorGenerator):
+    """Anchor generator for SSD.
+    Args:
+        strides (list[int]  | list[tuple[int, int]]): Strides of anchors
+            in multiple feature levels.
+        ratios (list[float]): The list of ratios between the height and width
+            of anchors in a single level.
+        basesize_ratio_range (tuple(float)): Ratio range of anchors.
+        input_size (int): Size of feature map, 300 for SSD300,
+            512 for SSD512.
+        scale_major (bool): Whether to multiply scales first when generating
+            base anchors. If true, the anchors in the same row will have the
+            same scales. It is always set to be False in SSD.
+    """
+    def __init__(self,
+                 strides,
+                 ratios,
+                 basesize_ratio_range,
+                 input_size=300,
+                 scale_major=True):
+        assert len(strides) == len(ratios)
+        assert mmcv.is_tuple_of(basesize_ratio_range, float)
+        self.strides = [_pair(stride) for stride in strides]
+        self.input_size = input_size
+        self.centers = [(stride[0] / 2., stride[1] / 2.)
+                        for stride in self.strides]
+        self.basesize_ratio_range = basesize_ratio_range
+        # calculate anchor ratios and sizes
+        min_ratio, max_ratio = basesize_ratio_range
+        min_ratio = int(min_ratio * 100)
+        max_ratio = int(max_ratio * 100)
+        step = int(np.floor(max_ratio - min_ratio) / (self.num_levels - 2))
+        min_sizes = []
+        max_sizes = []
+        for ratio in range(int(min_ratio), int(max_ratio) + 1, step):
+            min_sizes.append(int(self.input_size * ratio / 100))
+            max_sizes.append(int(self.input_size * (ratio + step) / 100))
+        if self.input_size == 300:
+            if basesize_ratio_range[0] == 0.15:  # SSD300 COCO
+                min_sizes.insert(0, int(self.input_size * 7 / 100))
+                max_sizes.insert(0, int(self.input_size * 15 / 100))
+            elif basesize_ratio_range[0] == 0.2:  # SSD300 VOC
+                min_sizes.insert(0, int(self.input_size * 10 / 100))
+                max_sizes.insert(0, int(self.input_size * 20 / 100))
+            else:
+                raise ValueError(
+                    'basesize_ratio_range[0] should be either 0.15'
+                    'or 0.2 when input_size is 300, got '
+                    f'{basesize_ratio_range[0]}.')
+        elif self.input_size == 512:
+            if basesize_ratio_range[0] == 0.1:  # SSD512 COCO
+                min_sizes.insert(0, int(self.input_size * 4 / 100))
+                max_sizes.insert(0, int(self.input_size * 10 / 100))
+            elif basesize_ratio_range[0] == 0.15:  # SSD512 VOC
+                min_sizes.insert(0, int(self.input_size * 7 / 100))
+                max_sizes.insert(0, int(self.input_size * 15 / 100))
+            else:
+                raise ValueError('basesize_ratio_range[0] should be either 0.1'
+                                 'or 0.15 when input_size is 512, got'
+                                 f' {basesize_ratio_range[0]}.')
+        else:
+            raise ValueError('Only support 300 or 512 in SSDAnchorGenerator'
+                             f', got {self.input_size}.')
+        anchor_ratios = []
+        anchor_scales = []
+        for k in range(len(self.strides)):
+            scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]
+            anchor_ratio = [1.]
+            for r in ratios[k]:
+                anchor_ratio += [1 / r, r]  # 4 or 6 ratio
+            anchor_ratios.append(torch.Tensor(anchor_ratio))
+            anchor_scales.append(torch.Tensor(scales))
+        self.base_sizes = min_sizes
+        self.scales = anchor_scales
+        self.ratios = anchor_ratios
+        self.scale_major = scale_major
+        self.center_offset = 0
+        self.base_anchors = self.gen_base_anchors()
+    def gen_base_anchors(self):
+        """Generate base anchors.
+        Returns:
+            list(torch.Tensor): Base anchors of a feature grid in multiple \
+                feature levels.
+        """
+        multi_level_base_anchors = []
+        for i, base_size in enumerate(self.base_sizes):
+            base_anchors = self.gen_single_level_base_anchors(
+                base_size,
+                scales=self.scales[i],
+                ratios=self.ratios[i],
+                center=self.centers[i])
+            indices = list(range(len(self.ratios[i])))
+            indices.insert(1, len(indices))
+            base_anchors = torch.index_select(base_anchors, 0,
+                                              torch.LongTensor(indices))
+            multi_level_base_anchors.append(base_anchors)
+        return multi_level_base_anchors
+    def __repr__(self):
+        """str: a string that describes the module"""
+        indent_str = '    '
+        repr_str = self.__class__.__name__ + '(\n'
+        repr_str += f'{indent_str}strides={self.strides},\n'
+        repr_str += f'{indent_str}scales={self.scales},\n'
+        repr_str += f'{indent_str}scale_major={self.scale_major},\n'
+        repr_str += f'{indent_str}input_size={self.input_size},\n'
+        repr_str += f'{indent_str}scales={self.scales},\n'
+        repr_str += f'{indent_str}ratios={self.ratios},\n'
+        repr_str += f'{indent_str}num_levels={self.num_levels},\n'
+        repr_str += f'{indent_str}base_sizes={self.base_sizes},\n'
+        repr_str += f'{indent_str}basesize_ratio_range='
+        repr_str += f'{self.basesize_ratio_range})'
+        return repr_str
+@ANCHOR_GENERATORS.register_module()
+class LegacyAnchorGenerator(AnchorGenerator):
+    """Legacy anchor generator used in MMDetection V1.x.
+    Note:
+        Difference to the V2.0 anchor generator:
+        1. The center offset of V1.x anchors are set to be 0.5 rather than 0.
+        2. The width/height are minused by 1 when calculating the anchors' \
+            centers and corners to meet the V1.x coordinate system.
+        3. The anchors' corners are quantized.
+    Args:
+        strides (list[int] | list[tuple[int]]): Strides of anchors
+            in multiple feature levels.
+        ratios (list[float]): The list of ratios between the height and width
+            of anchors in a single level.
+        scales (list[int] | None): Anchor scales for anchors in a single level.
+            It cannot be set at the same time if `octave_base_scale` and
+            `scales_per_octave` are set.
+        base_sizes (list[int]): The basic sizes of anchors in multiple levels.
+            If None is given, strides will be used to generate base_sizes.
+        scale_major (bool): Whether to multiply scales first when generating
+            base anchors. If true, the anchors in the same row will have the
+            same scales. By default it is True in V2.0
+        octave_base_scale (int): The base scale of octave.
+        scales_per_octave (int): Number of scales for each octave.
+            `octave_base_scale` and `scales_per_octave` are usually used in
+            retinanet and the `scales` should be None when they are set.
+        centers (list[tuple[float, float]] | None): The centers of the anchor
+            relative to the feature grid center in multiple feature levels.
+            By default it is set to be None and not used. It a list of float
+            is given, this list will be used to shift the centers of anchors.
+        center_offset (float): The offset of center in propotion to anchors'
+            width and height. By default it is 0.5 in V2.0 but it should be 0.5
+            in v1.x models.
+    Examples:
+        >>> from mmdet.core import LegacyAnchorGenerator
+        >>> self = LegacyAnchorGenerator(
+        >>>     [16], [1.], [1.], [9], center_offset=0.5)
+        >>> all_anchors = self.grid_anchors(((2, 2),), device='cpu')
+        >>> print(all_anchors)
+        [tensor([[ 0.,  0.,  8.,  8.],
+                [16.,  0., 24.,  8.],
+                [ 0., 16.,  8., 24.],
+                [16., 16., 24., 24.]])]
+    """
+    def gen_single_level_base_anchors(self,
+                                      base_size,
+                                      scales,
+                                      ratios,
+                                      center=None):
+        """Generate base anchors of a single level.
+        Note:
+            The width/height of anchors are minused by 1 when calculating \
+                the centers and corners to meet the V1.x coordinate system.
+        Args:
+            base_size (int | float): Basic size of an anchor.
+            scales (torch.Tensor): Scales of the anchor.
+            ratios (torch.Tensor): The ratio between between the height.
+                and width of anchors in a single level.
+            center (tuple[float], optional): The center of the base anchor
+                related to a single feature grid. Defaults to None.
+        Returns:
+            torch.Tensor: Anchors in a single-level feature map.
+        """
+        w = base_size
+        h = base_size
+        if center is None:
+            x_center = self.center_offset * (w - 1)
+            y_center = self.center_offset * (h - 1)
+        else:
+            x_center, y_center = center
+        h_ratios = torch.sqrt(ratios)
+        w_ratios = 1 / h_ratios
+        if self.scale_major:
+            ws = (w * w_ratios[:, None] * scales[None, :]).view(-1)
+            hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)
+        else:
+            ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)
+            hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)
+        # use float anchor and the anchor's center is aligned with the
+        # pixel center
+        base_anchors = [
+            x_center - 0.5 * (ws - 1), y_center - 0.5 * (hs - 1),
+            x_center + 0.5 * (ws - 1), y_center + 0.5 * (hs - 1)
+        ]
+        base_anchors = torch.stack(base_anchors, dim=-1).round()
+        return base_anchors
+@ANCHOR_GENERATORS.register_module()
+class LegacySSDAnchorGenerator(SSDAnchorGenerator, LegacyAnchorGenerator):
+    """Legacy anchor generator used in MMDetection V1.x.
+    The difference between `LegacySSDAnchorGenerator` and `SSDAnchorGenerator`
+    can be found in `LegacyAnchorGenerator`.
+    """
+    def __init__(self,
+                 strides,
+                 ratios,
+                 basesize_ratio_range,
+                 input_size=300,
+                 scale_major=True):
+        super(LegacySSDAnchorGenerator,
+              self).__init__(strides, ratios, basesize_ratio_range, input_size,
+                             scale_major)
+        self.centers = [((stride - 1) / 2., (stride - 1) / 2.)
+                        for stride in strides]
+        self.base_anchors = self.gen_base_anchors()
+@ANCHOR_GENERATORS.register_module()
+class YOLOAnchorGenerator(AnchorGenerator):
+    """Anchor generator for YOLO.
+    Args:
+        strides (list[int] | list[tuple[int, int]]): Strides of anchors
+            in multiple feature levels.
+        base_sizes (list[list[tuple[int, int]]]): The basic sizes
+            of anchors in multiple levels.
+    """
+    def __init__(self, strides, base_sizes):
+        self.strides = [_pair(stride) for stride in strides]
+        self.centers = [(stride[0] / 2., stride[1] / 2.)
+                        for stride in self.strides]
+        self.base_sizes = []
+        num_anchor_per_level = len(base_sizes[0])
+        for base_sizes_per_level in base_sizes:
+            assert num_anchor_per_level == len(base_sizes_per_level)
+            self.base_sizes.append(
+                [_pair(base_size) for base_size in base_sizes_per_level])
+        self.base_anchors = self.gen_base_anchors()
+    @property
+    def num_levels(self):
+        """int: number of feature levels that the generator will be applied"""
+        return len(self.base_sizes)
+    def gen_base_anchors(self):
+        """Generate base anchors.
+        Returns:
+            list(torch.Tensor): Base anchors of a feature grid in multiple \
+                feature levels.
+        """
+        multi_level_base_anchors = []
+        for i, base_sizes_per_level in enumerate(self.base_sizes):
+            center = None
+            if self.centers is not None:
+                center = self.centers[i]
+            multi_level_base_anchors.append(
+                self.gen_single_level_base_anchors(base_sizes_per_level,
+                                                   center))
+        return multi_level_base_anchors
+    def gen_single_level_base_anchors(self, base_sizes_per_level, center=None):
+        """Generate base anchors of a single level.
+        Args:
+            base_sizes_per_level (list[tuple[int, int]]): Basic sizes of
+                anchors.
+            center (tuple[float], optional): The center of the base anchor
+                related to a single feature grid. Defaults to None.
+        Returns:
+            torch.Tensor: Anchors in a single-level feature maps.
+        """
+        x_center, y_center = center
+        base_anchors = []
+        for base_size in base_sizes_per_level:
+            w, h = base_size
+            # use float anchor and the anchor's center is aligned with the
+            # pixel center
+            base_anchor = torch.Tensor([
+                x_center - 0.5 * w, y_center - 0.5 * h, x_center + 0.5 * w,
+                y_center + 0.5 * h
+            ])
+            base_anchors.append(base_anchor)
+        base_anchors = torch.stack(base_anchors, dim=0)
+        return base_anchors
+    def responsible_flags(self, featmap_sizes, gt_bboxes, device='cuda'):
+        """Generate responsible anchor flags of grid cells in multiple scales.
+        Args:
+            featmap_sizes (list(tuple)): List of feature map sizes in multiple
+                feature levels.
+            gt_bboxes (Tensor): Ground truth boxes, shape (n, 4).
+            device (str): Device where the anchors will be put on.
+        Return:
+            list(torch.Tensor): responsible flags of anchors in multiple level
+        """
+        assert self.num_levels == len(featmap_sizes)
+        multi_level_responsible_flags = []
+        for i in range(self.num_levels):
+            anchor_stride = self.strides[i]
+            flags = self.single_level_responsible_flags(
+                featmap_sizes[i],
+                gt_bboxes,
+                anchor_stride,
+                self.num_base_anchors[i],
+                device=device)
+            multi_level_responsible_flags.append(flags)
+        return multi_level_responsible_flags
+    def single_level_responsible_flags(self,
+                                       featmap_size,
+                                       gt_bboxes,
+                                       stride,
+                                       num_base_anchors,
+                                       device='cuda'):
+        """Generate the responsible flags of anchor in a single feature map.
+        Args:
+            featmap_size (tuple[int]): The size of feature maps.
+            gt_bboxes (Tensor): Ground truth boxes, shape (n, 4).
+            stride (tuple(int)): stride of current level
+            num_base_anchors (int): The number of base anchors.
+            device (str, optional): Device where the flags will be put on.
+                Defaults to 'cuda'.
+        Returns:
+            torch.Tensor: The valid flags of each anchor in a single level \
+                feature map.
+        """
+        feat_h, feat_w = featmap_size
+        gt_bboxes_cx = ((gt_bboxes[:, 0] + gt_bboxes[:, 2]) * 0.5).to(device)
+        gt_bboxes_cy = ((gt_bboxes[:, 1] + gt_bboxes[:, 3]) * 0.5).to(device)
+        gt_bboxes_grid_x = torch.floor(gt_bboxes_cx / stride[0]).long()
+        gt_bboxes_grid_y = torch.floor(gt_bboxes_cy / stride[1]).long()
+        # row major indexing
+        gt_bboxes_grid_idx = gt_bboxes_grid_y * feat_w + gt_bboxes_grid_x
+        responsible_grid = torch.zeros(
+            feat_h * feat_w, dtype=torch.uint8, device=device)
+        responsible_grid[gt_bboxes_grid_idx] = 1
+        responsible_grid = responsible_grid[:, None].expand(
+            responsible_grid.size(0), num_base_anchors).contiguous().view(-1)
+        return responsible_grid

mmdet/core/anchor/builder.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from mmcv.utils import Registry, build_from_cfg
+ANCHOR_GENERATORS = Registry('Anchor generator')
+def build_anchor_generator(cfg, default_args=None):
+    return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args)

mmdet/core/anchor/point_generator.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+from .builder import ANCHOR_GENERATORS
+@ANCHOR_GENERATORS.register_module()
+class PointGenerator(object):
+    def _meshgrid(self, x, y, row_major=True):
+        xx = x.repeat(len(y))
+        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
+        if row_major:
+            return xx, yy
+        else:
+            return yy, xx
+    def grid_points(self, featmap_size, stride=16, device='cuda'):
+        feat_h, feat_w = featmap_size
+        shift_x = torch.arange(0., feat_w, device=device) * stride
+        shift_y = torch.arange(0., feat_h, device=device) * stride
+        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+        stride = shift_x.new_full((shift_xx.shape[0], ), stride)
+        shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
+        all_points = shifts.to(device)
+        return all_points
+    def valid_flags(self, featmap_size, valid_size, device='cuda'):
+        feat_h, feat_w = featmap_size
+        valid_h, valid_w = valid_size
+        assert valid_h <= feat_h and valid_w <= feat_w
+        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
+        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
+        valid_x[:valid_w] = 1
+        valid_y[:valid_h] = 1
+        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+        valid = valid_xx & valid_yy
+        return valid

mmdet/core/anchor/utils.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import torch
+def images_to_levels(target, num_levels):
+    """Convert targets by image to targets by feature level.
+    [target_img0, target_img1] -> [target_level0, target_level1, ...]
+    """
+    target = torch.stack(target, 0)
+    level_targets = []
+    start = 0
+    for n in num_levels:
+        end = start + n
+        # level_targets.append(target[:, start:end].squeeze(0))
+        level_targets.append(target[:, start:end])
+        start = end
+    return level_targets
+def anchor_inside_flags(flat_anchors,
+                        valid_flags,
+                        img_shape,
+                        allowed_border=0):
+    """Check whether the anchors are inside the border.
+    Args:
+        flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4).
+        valid_flags (torch.Tensor): An existing valid flags of anchors.
+        img_shape (tuple(int)): Shape of current image.
+        allowed_border (int, optional): The border to allow the valid anchor.
+            Defaults to 0.
+    Returns:
+        torch.Tensor: Flags indicating whether the anchors are inside a \
+            valid range.
+    """
+    img_h, img_w = img_shape[:2]
+    if allowed_border >= 0:
+        inside_flags = valid_flags & \
+            (flat_anchors[:, 0] >= -allowed_border) & \
+            (flat_anchors[:, 1] >= -allowed_border) & \
+            (flat_anchors[:, 2] < img_w + allowed_border) & \
+            (flat_anchors[:, 3] < img_h + allowed_border)
+    else:
+        inside_flags = valid_flags
+    return inside_flags
+def calc_region(bbox, ratio, featmap_size=None):
+    """Calculate a proportional bbox region.
+    The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
+    Args:
+        bbox (Tensor): Bboxes to calculate regions, shape (n, 4).
+        ratio (float): Ratio of the output region.
+        featmap_size (tuple): Feature map size used for clipping the boundary.
+    Returns:
+        tuple: x1, y1, x2, y2
+    """
+    x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
+    y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
+    x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
+    y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
+    if featmap_size is not None:
+        x1 = x1.clamp(min=0, max=featmap_size[1])
+        y1 = y1.clamp(min=0, max=featmap_size[0])
+        x2 = x2.clamp(min=0, max=featmap_size[1])
+        y2 = y2.clamp(min=0, max=featmap_size[0])
+    return (x1, y1, x2, y2)

mmdet/core/bbox/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner,
+                        MaxIoUAssigner, RegionAssigner)
+from .builder import build_assigner, build_bbox_coder, build_sampler
+from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder,
+                    TBLRBBoxCoder)
+from .iou_calculators import BboxOverlaps2D, bbox_overlaps
+from .samplers import (BaseSampler, CombinedSampler,
+                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
+                       OHEMSampler, PseudoSampler, RandomSampler,
+                       SamplingResult, ScoreHLRSampler)
+from .transforms import (bbox2distance, bbox2result, bbox2roi,
+                         bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping,
+                         bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh,
+                         distance2bbox, roi2bbox)
+__all__ = [
+    'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner',
+    'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
+    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
+    'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner',
+    'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back',
+    'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance',
+    'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder',
+    'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner',
+    'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh',
+    'RegionAssigner'
+]

mmdet/core/bbox/assigners/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from .approx_max_iou_assigner import ApproxMaxIoUAssigner
+from .assign_result import AssignResult
+from .atss_assigner import ATSSAssigner
+from .base_assigner import BaseAssigner
+from .center_region_assigner import CenterRegionAssigner
+from .grid_assigner import GridAssigner
+from .hungarian_assigner import HungarianAssigner
+from .max_iou_assigner import MaxIoUAssigner
+from .point_assigner import PointAssigner
+from .region_assigner import RegionAssigner
+__all__ = [
+    'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
+    'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner',
+    'HungarianAssigner', 'RegionAssigner'
+]

mmdet/core/bbox/assigners/approx_max_iou_assigner.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import torch
+from ..builder import BBOX_ASSIGNERS
+from ..iou_calculators import build_iou_calculator
+from .max_iou_assigner import MaxIoUAssigner
+@BBOX_ASSIGNERS.register_module()
+class ApproxMaxIoUAssigner(MaxIoUAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+    Each proposals will be assigned with an integer indicating the ground truth
+     index. (semi-positive index: gt label (0-based), -1: background)
+    - -1: negative sample, no assigned gt
+    - semi-positive integer: positive sample, index (0-based) of assigned gt
+    Args:
+        pos_iou_thr (float): IoU threshold for positive bboxes.
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
+            positive bbox. Positive samples can have smaller IoU than
+            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+        gt_max_assign_all (bool): Whether to assign all bboxes with the same
+            highest overlap with some gt to that gt.
+        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+            `gt_bboxes_ignore` is specified). Negative values mean not
+            ignoring any bboxes.
+        ignore_wrt_candidates (bool): Whether to compute the iof between
+            `bboxes` and `gt_bboxes_ignore`, or the contrary.
+        match_low_quality (bool): Whether to allow quality matches. This is
+            usually allowed for RPN and single stage detectors, but not allowed
+            in the second stage.
+        gpu_assign_thr (int): The upper bound of the number of GT for GPU
+            assign. When the number of gt is above this threshold, will assign
+            on CPU device. Negative values mean not assign on CPU.
+    """
+    def __init__(self,
+                 pos_iou_thr,
+                 neg_iou_thr,
+                 min_pos_iou=.0,
+                 gt_max_assign_all=True,
+                 ignore_iof_thr=-1,
+                 ignore_wrt_candidates=True,
+                 match_low_quality=True,
+                 gpu_assign_thr=-1,
+                 iou_calculator=dict(type='BboxOverlaps2D')):
+        self.pos_iou_thr = pos_iou_thr
+        self.neg_iou_thr = neg_iou_thr
+        self.min_pos_iou = min_pos_iou
+        self.gt_max_assign_all = gt_max_assign_all
+        self.ignore_iof_thr = ignore_iof_thr
+        self.ignore_wrt_candidates = ignore_wrt_candidates
+        self.gpu_assign_thr = gpu_assign_thr
+        self.match_low_quality = match_low_quality
+        self.iou_calculator = build_iou_calculator(iou_calculator)
+    def assign(self,
+               approxs,
+               squares,
+               approxs_per_octave,
+               gt_bboxes,
+               gt_bboxes_ignore=None,
+               gt_labels=None):
+        """Assign gt to approxs.
+        This method assign a gt bbox to each group of approxs (bboxes),
+        each group of approxs is represent by a base approx (bbox) and
+        will be assigned with -1, or a semi-positive number.
+        background_label (-1) means negative sample,
+        semi-positive number is the index (0-based) of assigned gt.
+        The assignment is done in following steps, the order matters.
+        1. assign every bbox to background_label (-1)
+        2. use the max IoU of each group of approxs to assign
+        2. assign proposals whose iou with all gts < neg_iou_thr to background
+        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+           assign it to that bbox
+        4. for each gt bbox, assign its nearest proposals (may be more than
+           one) to itself
+        Args:
+            approxs (Tensor): Bounding boxes to be assigned,
+                shape(approxs_per_octave*n, 4).
+            squares (Tensor): Base Bounding boxes to be assigned,
+                shape(n, 4).
+            approxs_per_octave (int): number of approxs per octave
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        num_squares = squares.size(0)
+        num_gts = gt_bboxes.size(0)
+        if num_squares == 0 or num_gts == 0:
+            # No predictions and/or truth, return empty assignment
+            overlaps = approxs.new(num_gts, num_squares)
+            assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+            return assign_result
+        # re-organize anchors by approxs_per_octave x num_squares
+        approxs = torch.transpose(
+            approxs.view(num_squares, approxs_per_octave, 4), 0,
+            1).contiguous().view(-1, 4)
+        assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
+            num_gts > self.gpu_assign_thr) else False
+        # compute overlap and assign gt on CPU when number of GT is large
+        if assign_on_cpu:
+            device = approxs.device
+            approxs = approxs.cpu()
+            gt_bboxes = gt_bboxes.cpu()
+            if gt_bboxes_ignore is not None:
+                gt_bboxes_ignore = gt_bboxes_ignore.cpu()
+            if gt_labels is not None:
+                gt_labels = gt_labels.cpu()
+        all_overlaps = self.iou_calculator(approxs, gt_bboxes)
+        overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,
+                                        num_gts).max(dim=0)
+        overlaps = torch.transpose(overlaps, 0, 1)
+        if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None
+                and gt_bboxes_ignore.numel() > 0 and squares.numel() > 0):
+            if self.ignore_wrt_candidates:
+                ignore_overlaps = self.iou_calculator(
+                    squares, gt_bboxes_ignore, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+            else:
+                ignore_overlaps = self.iou_calculator(
+                    gt_bboxes_ignore, squares, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+        if assign_on_cpu:
+            assign_result.gt_inds = assign_result.gt_inds.to(device)
+            assign_result.max_overlaps = assign_result.max_overlaps.to(device)
+            if assign_result.labels is not None:
+                assign_result.labels = assign_result.labels.to(device)
+        return assign_result

mmdet/core/bbox/assigners/assign_result.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import torch
+from mmdet.utils import util_mixins
+class AssignResult(util_mixins.NiceRepr):
+    """Stores assignments between predicted and truth boxes.
+    Attributes:
+        num_gts (int): the number of truth boxes considered when computing this
+            assignment
+        gt_inds (LongTensor): for each predicted box indicates the 1-based
+            index of the assigned truth box. 0 means unassigned and -1 means
+            ignore.
+        max_overlaps (FloatTensor): the iou between the predicted box and its
+            assigned truth box.
+        labels (None | LongTensor): If specified, for each predicted box
+            indicates the category label of the assigned truth box.
+    Example:
+        >>> # An assign result between 4 predicted boxes and 9 true boxes
+        >>> # where only two boxes were assigned.
+        >>> num_gts = 9
+        >>> max_overlaps = torch.LongTensor([0, .5, .9, 0])
+        >>> gt_inds = torch.LongTensor([-1, 1, 2, 0])
+        >>> labels = torch.LongTensor([0, 3, 4, 0])
+        >>> self = AssignResult(num_gts, gt_inds, max_overlaps, labels)
+        >>> print(str(self))  # xdoctest: +IGNORE_WANT
+        <AssignResult(num_gts=9, gt_inds.shape=(4,), max_overlaps.shape=(4,),
+                      labels.shape=(4,))>
+        >>> # Force addition of gt labels (when adding gt as proposals)
+        >>> new_labels = torch.LongTensor([3, 4, 5])
+        >>> self.add_gt_(new_labels)
+        >>> print(str(self))  # xdoctest: +IGNORE_WANT
+        <AssignResult(num_gts=9, gt_inds.shape=(7,), max_overlaps.shape=(7,),
+                      labels.shape=(7,))>
+    """
+    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
+        self.num_gts = num_gts
+        self.gt_inds = gt_inds
+        self.max_overlaps = max_overlaps
+        self.labels = labels
+        # Interface for possible user-defined properties
+        self._extra_properties = {}
+    @property
+    def num_preds(self):
+        """int: the number of predictions in this assignment"""
+        return len(self.gt_inds)
+    def set_extra_property(self, key, value):
+        """Set user-defined new property."""
+        assert key not in self.info
+        self._extra_properties[key] = value
+    def get_extra_property(self, key):
+        """Get user-defined property."""
+        return self._extra_properties.get(key, None)
+    @property
+    def info(self):
+        """dict: a dictionary of info about the object"""
+        basic_info = {
+            'num_gts': self.num_gts,
+            'num_preds': self.num_preds,
+            'gt_inds': self.gt_inds,
+            'max_overlaps': self.max_overlaps,
+            'labels': self.labels,
+        }
+        basic_info.update(self._extra_properties)
+        return basic_info
+    def __nice__(self):
+        """str: a "nice" summary string describing this assign result"""
+        parts = []
+        parts.append(f'num_gts={self.num_gts!r}')
+        if self.gt_inds is None:
+            parts.append(f'gt_inds={self.gt_inds!r}')
+        else:
+            parts.append(f'gt_inds.shape={tuple(self.gt_inds.shape)!r}')
+        if self.max_overlaps is None:
+            parts.append(f'max_overlaps={self.max_overlaps!r}')
+        else:
+            parts.append('max_overlaps.shape='
+                         f'{tuple(self.max_overlaps.shape)!r}')
+        if self.labels is None:
+            parts.append(f'labels={self.labels!r}')
+        else:
+            parts.append(f'labels.shape={tuple(self.labels.shape)!r}')
+        return ', '.join(parts)
+    @classmethod
+    def random(cls, **kwargs):
+        """Create random AssignResult for tests or debugging.
+        Args:
+            num_preds: number of predicted boxes
+            num_gts: number of true boxes
+            p_ignore (float): probability of a predicted box assinged to an
+                ignored truth
+            p_assigned (float): probability of a predicted box not being
+                assigned
+            p_use_label (float | bool): with labels or not
+            rng (None | int | numpy.random.RandomState): seed or state
+        Returns:
+            :obj:`AssignResult`: Randomly generated assign results.
+        Example:
+            >>> from mmdet.core.bbox.assigners.assign_result import *  # NOQA
+            >>> self = AssignResult.random()
+            >>> print(self.info)
+        """
+        from mmdet.core.bbox import demodata
+        rng = demodata.ensure_rng(kwargs.get('rng', None))
+        num_gts = kwargs.get('num_gts', None)
+        num_preds = kwargs.get('num_preds', None)
+        p_ignore = kwargs.get('p_ignore', 0.3)
+        p_assigned = kwargs.get('p_assigned', 0.7)
+        p_use_label = kwargs.get('p_use_label', 0.5)
+        num_classes = kwargs.get('p_use_label', 3)
+        if num_gts is None:
+            num_gts = rng.randint(0, 8)
+        if num_preds is None:
+            num_preds = rng.randint(0, 16)
+        if num_gts == 0:
+            max_overlaps = torch.zeros(num_preds, dtype=torch.float32)
+            gt_inds = torch.zeros(num_preds, dtype=torch.int64)
+            if p_use_label is True or p_use_label < rng.rand():
+                labels = torch.zeros(num_preds, dtype=torch.int64)
+            else:
+                labels = None
+        else:
+            import numpy as np
+            # Create an overlap for each predicted box
+            max_overlaps = torch.from_numpy(rng.rand(num_preds))
+            # Construct gt_inds for each predicted box
+            is_assigned = torch.from_numpy(rng.rand(num_preds) < p_assigned)
+            # maximum number of assignments constraints
+            n_assigned = min(num_preds, min(num_gts, is_assigned.sum()))
+            assigned_idxs = np.where(is_assigned)[0]
+            rng.shuffle(assigned_idxs)
+            assigned_idxs = assigned_idxs[0:n_assigned]
+            assigned_idxs.sort()
+            is_assigned[:] = 0
+            is_assigned[assigned_idxs] = True
+            is_ignore = torch.from_numpy(
+                rng.rand(num_preds) < p_ignore) & is_assigned
+            gt_inds = torch.zeros(num_preds, dtype=torch.int64)
+            true_idxs = np.arange(num_gts)
+            rng.shuffle(true_idxs)
+            true_idxs = torch.from_numpy(true_idxs)
+            gt_inds[is_assigned] = true_idxs[:n_assigned]
+            gt_inds = torch.from_numpy(
+                rng.randint(1, num_gts + 1, size=num_preds))
+            gt_inds[is_ignore] = -1
+            gt_inds[~is_assigned] = 0
+            max_overlaps[~is_assigned] = 0
+            if p_use_label is True or p_use_label < rng.rand():
+                if num_classes == 0:
+                    labels = torch.zeros(num_preds, dtype=torch.int64)
+                else:
+                    labels = torch.from_numpy(
+                        # remind that we set FG labels to [0, num_class-1]
+                        # since mmdet v2.0
+                        # BG cat_id: num_class
+                        rng.randint(0, num_classes, size=num_preds))
+                    labels[~is_assigned] = 0
+            else:
+                labels = None
+        self = cls(num_gts, gt_inds, max_overlaps, labels)
+        return self
+    def add_gt_(self, gt_labels):
+        """Add ground truth as assigned results.
+        Args:
+            gt_labels (torch.Tensor): Labels of gt boxes
+        """
+        self_inds = torch.arange(
+            1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
+        self.gt_inds = torch.cat([self_inds, self.gt_inds])
+        self.max_overlaps = torch.cat(
+            [self.max_overlaps.new_ones(len(gt_labels)), self.max_overlaps])
+        if self.labels is not None:
+            self.labels = torch.cat([gt_labels, self.labels])

mmdet/core/bbox/assigners/atss_assigner.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import torch
+from ..builder import BBOX_ASSIGNERS
+from ..iou_calculators import build_iou_calculator
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+@BBOX_ASSIGNERS.register_module()
+class ATSSAssigner(BaseAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+    Each proposals will be assigned with `0` or a positive integer
+    indicating the ground truth index.
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        topk (float): number of bbox selected in each level
+    """
+    def __init__(self,
+                 topk,
+                 iou_calculator=dict(type='BboxOverlaps2D'),
+                 ignore_iof_thr=-1):
+        self.topk = topk
+        self.iou_calculator = build_iou_calculator(iou_calculator)
+        self.ignore_iof_thr = ignore_iof_thr
+    # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py
+    def assign(self,
+               bboxes,
+               num_level_bboxes,
+               gt_bboxes,
+               gt_bboxes_ignore=None,
+               gt_labels=None):
+        """Assign gt to bboxes.
+        The assignment is done in following steps
+        1. compute iou between all bbox (bbox of all pyramid levels) and gt
+        2. compute center distance between all bbox and gt
+        3. on each pyramid level, for each gt, select k bbox whose center
+           are closest to the gt center, so we total select k*l bbox as
+           candidates for each gt
+        4. get corresponding iou for the these candidates, and compute the
+           mean and std, set mean + std as the iou threshold
+        5. select these candidates whose iou are greater than or equal to
+           the threshold as positive
+        6. limit the positive sample's center in gt
+        Args:
+            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+            num_level_bboxes (List): num of bboxes in each level
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        INF = 100000000
+        bboxes = bboxes[:, :4]
+        num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+        # compute iou between all bbox and gt
+        overlaps = self.iou_calculator(bboxes, gt_bboxes)
+        # assign 0 by default
+        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+                                             0,
+                                             dtype=torch.long)
+        if num_gt == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = overlaps.new_zeros((num_bboxes, ))
+            if num_gt == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if gt_labels is None:
+                assigned_labels = None
+            else:
+                assigned_labels = overlaps.new_full((num_bboxes, ),
+                                                    -1,
+                                                    dtype=torch.long)
+            return AssignResult(
+                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
+        # compute center distance between all bbox and gt
+        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+        gt_points = torch.stack((gt_cx, gt_cy), dim=1)
+        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
+        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
+        bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1)
+        distances = (bboxes_points[:, None, :] -
+                     gt_points[None, :, :]).pow(2).sum(-1).sqrt()
+        if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None
+                and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0):
+            ignore_overlaps = self.iou_calculator(
+                bboxes, gt_bboxes_ignore, mode='iof')
+            ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+            ignore_idxs = ignore_max_overlaps > self.ignore_iof_thr
+            distances[ignore_idxs, :] = INF
+            assigned_gt_inds[ignore_idxs] = -1
+        # Selecting candidates based on the center distance
+        candidate_idxs = []
+        start_idx = 0
+        for level, bboxes_per_level in enumerate(num_level_bboxes):
+            # on each pyramid level, for each gt,
+            # select k bbox whose center are closest to the gt center
+            end_idx = start_idx + bboxes_per_level
+            distances_per_level = distances[start_idx:end_idx, :]
+            selectable_k = min(self.topk, bboxes_per_level)
+            _, topk_idxs_per_level = distances_per_level.topk(
+                selectable_k, dim=0, largest=False)
+            candidate_idxs.append(topk_idxs_per_level + start_idx)
+            start_idx = end_idx
+        candidate_idxs = torch.cat(candidate_idxs, dim=0)
+        # get corresponding iou for the these candidates, and compute the
+        # mean and std, set mean + std as the iou threshold
+        candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)]
+        overlaps_mean_per_gt = candidate_overlaps.mean(0)
+        overlaps_std_per_gt = candidate_overlaps.std(0)
+        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
+        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
+        # limit the positive sample's center in gt
+        for gt_idx in range(num_gt):
+            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
+        ep_bboxes_cx = bboxes_cx.view(1, -1).expand(
+            num_gt, num_bboxes).contiguous().view(-1)
+        ep_bboxes_cy = bboxes_cy.view(1, -1).expand(
+            num_gt, num_bboxes).contiguous().view(-1)
+        candidate_idxs = candidate_idxs.view(-1)
+        # calculate the left, top, right, bottom distance between positive
+        # bbox center and gt side
+        l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]
+        t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]
+        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt)
+        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt)
+        is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01
+        is_pos = is_pos & is_in_gts
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest IoU will be selected.
+        overlaps_inf = torch.full_like(overlaps,
+                                       -INF).t().contiguous().view(-1)
+        index = candidate_idxs.view(-1)[is_pos.view(-1)]
+        overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]
+        overlaps_inf = overlaps_inf.view(num_gt, -1).t()
+        max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)
+        assigned_gt_inds[
+            max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1
+        if gt_labels is not None:
+            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)
+            pos_inds = torch.nonzero(
+                assigned_gt_inds > 0, as_tuple=False).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[
+                    assigned_gt_inds[pos_inds] - 1]
+        else:
+            assigned_labels = None
+        return AssignResult(
+            num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)

mmdet/core/bbox/assigners/base_assigner.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from abc import ABCMeta, abstractmethod
+class BaseAssigner(metaclass=ABCMeta):
+    """Base assigner that assigns boxes to ground truth boxes."""
+    @abstractmethod
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        """Assign boxes to either a ground truth boxes or a negative boxes."""

mmdet/core/bbox/assigners/center_region_assigner.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import torch
+from ..builder import BBOX_ASSIGNERS
+from ..iou_calculators import build_iou_calculator
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+def scale_boxes(bboxes, scale):
+    """Expand an array of boxes by a given scale.
+    Args:
+        bboxes (Tensor): Shape (m, 4)
+        scale (float): The scale factor of bboxes
+    Returns:
+        (Tensor): Shape (m, 4). Scaled bboxes
+    """
+    assert bboxes.size(1) == 4
+    w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5
+    h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5
+    x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5
+    y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5
+    w_half *= scale
+    h_half *= scale
+    boxes_scaled = torch.zeros_like(bboxes)
+    boxes_scaled[:, 0] = x_c - w_half
+    boxes_scaled[:, 2] = x_c + w_half
+    boxes_scaled[:, 1] = y_c - h_half
+    boxes_scaled[:, 3] = y_c + h_half
+    return boxes_scaled
+def is_located_in(points, bboxes):
+    """Are points located in bboxes.
+    Args:
+      points (Tensor): Points, shape: (m, 2).
+      bboxes (Tensor): Bounding boxes, shape: (n, 4).
+    Return:
+      Tensor: Flags indicating if points are located in bboxes, shape: (m, n).
+    """
+    assert points.size(1) == 2
+    assert bboxes.size(1) == 4
+    return (points[:, 0].unsqueeze(1) > bboxes[:, 0].unsqueeze(0)) & \
+           (points[:, 0].unsqueeze(1) < bboxes[:, 2].unsqueeze(0)) & \
+           (points[:, 1].unsqueeze(1) > bboxes[:, 1].unsqueeze(0)) & \
+           (points[:, 1].unsqueeze(1) < bboxes[:, 3].unsqueeze(0))
+def bboxes_area(bboxes):
+    """Compute the area of an array of bboxes.
+    Args:
+        bboxes (Tensor): The coordinates ox bboxes. Shape: (m, 4)
+    Returns:
+        Tensor: Area of the bboxes. Shape: (m, )
+    """
+    assert bboxes.size(1) == 4
+    w = (bboxes[:, 2] - bboxes[:, 0])
+    h = (bboxes[:, 3] - bboxes[:, 1])
+    areas = w * h
+    return areas
+@BBOX_ASSIGNERS.register_module()
+class CenterRegionAssigner(BaseAssigner):
+    """Assign pixels at the center region of a bbox as positive.
+    Each proposals will be assigned with `-1`, `0`, or a positive integer
+    indicating the ground truth index.
+    - -1: negative samples
+    - semi-positive numbers: positive sample, index (0-based) of assigned gt
+    Args:
+        pos_scale (float): Threshold within which pixels are
+          labelled as positive.
+        neg_scale (float): Threshold above which pixels are
+          labelled as positive.
+        min_pos_iof (float): Minimum iof of a pixel with a gt to be
+          labelled as positive. Default: 1e-2
+        ignore_gt_scale (float): Threshold within which the pixels
+          are ignored when the gt is labelled as shadowed. Default: 0.5
+        foreground_dominate (bool): If True, the bbox will be assigned as
+          positive when a gt's kernel region overlaps with another's shadowed
+          (ignored) region, otherwise it is set as ignored. Default to False.
+    """
+    def __init__(self,
+                 pos_scale,
+                 neg_scale,
+                 min_pos_iof=1e-2,
+                 ignore_gt_scale=0.5,
+                 foreground_dominate=False,
+                 iou_calculator=dict(type='BboxOverlaps2D')):
+        self.pos_scale = pos_scale
+        self.neg_scale = neg_scale
+        self.min_pos_iof = min_pos_iof
+        self.ignore_gt_scale = ignore_gt_scale
+        self.foreground_dominate = foreground_dominate
+        self.iou_calculator = build_iou_calculator(iou_calculator)
+    def get_gt_priorities(self, gt_bboxes):
+        """Get gt priorities according to their areas.
+        Smaller gt has higher priority.
+        Args:
+            gt_bboxes (Tensor): Ground truth boxes, shape (k, 4).
+        Returns:
+            Tensor: The priority of gts so that gts with larger priority is \
+              more likely to be assigned. Shape (k, )
+        """
+        gt_areas = bboxes_area(gt_bboxes)
+        # Rank all gt bbox areas. Smaller objects has larger priority
+        _, sort_idx = gt_areas.sort(descending=True)
+        sort_idx = sort_idx.argsort()
+        return sort_idx
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        """Assign gt to bboxes.
+        This method assigns gts to every bbox (proposal/anchor), each bbox \
+        will be assigned with -1, or a semi-positive number. -1 means \
+        negative sample, semi-positive number is the index (0-based) of \
+        assigned gt.
+        Args:
+            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (tensor, optional): Ground truth bboxes that are
+              labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (tensor, optional): Label of gt_bboxes, shape (num_gts,).
+        Returns:
+            :obj:`AssignResult`: The assigned result. Note that \
+              shadowed_labels of shape (N, 2) is also added as an \
+              `assign_result` attribute. `shadowed_labels` is a tensor \
+              composed of N pairs of anchor_ind, class_label], where N \
+              is the number of anchors that lie in the outer region of a \
+              gt, anchor_ind is the shadowed anchor index and class_label \
+              is the shadowed class label.
+        Example:
+            >>> self = CenterRegionAssigner(0.2, 0.2)
+            >>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])
+            >>> gt_bboxes = torch.Tensor([[0, 0, 10, 10]])
+            >>> assign_result = self.assign(bboxes, gt_bboxes)
+            >>> expected_gt_inds = torch.LongTensor([1, 0])
+            >>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
+        """
+        # There are in total 5 steps in the pixel assignment
+        # 1. Find core (the center region, say inner 0.2)
+        #     and shadow (the relatively ourter part, say inner 0.2-0.5)
+        #     regions of every gt.
+        # 2. Find all prior bboxes that lie in gt_core and gt_shadow regions
+        # 3. Assign prior bboxes in gt_core with a one-hot id of the gt in
+        #      the image.
+        #    3.1. For overlapping objects, the prior bboxes in gt_core is
+        #           assigned with the object with smallest area
+        # 4. Assign prior bboxes with class label according to its gt id.
+        #    4.1. Assign -1 to prior bboxes lying in shadowed gts
+        #    4.2. Assign positive prior boxes with the corresponding label
+        # 5. Find pixels lying in the shadow of an object and assign them with
+        #      background label, but set the loss weight of its corresponding
+        #      gt to zero.
+        assert bboxes.size(1) == 4, 'bboxes must have size of 4'
+        # 1. Find core positive and shadow region of every gt
+        gt_core = scale_boxes(gt_bboxes, self.pos_scale)
+        gt_shadow = scale_boxes(gt_bboxes, self.neg_scale)
+        # 2. Find prior bboxes that lie in gt_core and gt_shadow regions
+        bbox_centers = (bboxes[:, 2:4] + bboxes[:, 0:2]) / 2
+        # The center points lie within the gt boxes
+        is_bbox_in_gt = is_located_in(bbox_centers, gt_bboxes)
+        # Only calculate bbox and gt_core IoF. This enables small prior bboxes
+        #   to match large gts
+        bbox_and_gt_core_overlaps = self.iou_calculator(
+            bboxes, gt_core, mode='iof')
+        # The center point of effective priors should be within the gt box
+        is_bbox_in_gt_core = is_bbox_in_gt & (
+            bbox_and_gt_core_overlaps > self.min_pos_iof)  # shape (n, k)
+        is_bbox_in_gt_shadow = (
+            self.iou_calculator(bboxes, gt_shadow, mode='iof') >
+            self.min_pos_iof)
+        # Rule out center effective positive pixels
+        is_bbox_in_gt_shadow &= (~is_bbox_in_gt_core)
+        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+        if num_gts == 0 or num_bboxes == 0:
+            # If no gts exist, assign all pixels to negative
+            assigned_gt_ids = \
+                is_bbox_in_gt_core.new_zeros((num_bboxes,),
+                                             dtype=torch.long)
+            pixels_in_gt_shadow = assigned_gt_ids.new_empty((0, 2))
+        else:
+            # Step 3: assign a one-hot gt id to each pixel, and smaller objects
+            #    have high priority to assign the pixel.
+            sort_idx = self.get_gt_priorities(gt_bboxes)
+            assigned_gt_ids, pixels_in_gt_shadow = \
+                self.assign_one_hot_gt_indices(is_bbox_in_gt_core,
+                                               is_bbox_in_gt_shadow,
+                                               gt_priority=sort_idx)
+        if gt_bboxes_ignore is not None and gt_bboxes_ignore.numel() > 0:
+            # No ground truth or boxes, return empty assignment
+            gt_bboxes_ignore = scale_boxes(
+                gt_bboxes_ignore, scale=self.ignore_gt_scale)
+            is_bbox_in_ignored_gts = is_located_in(bbox_centers,
+                                                   gt_bboxes_ignore)
+            is_bbox_in_ignored_gts = is_bbox_in_ignored_gts.any(dim=1)
+            assigned_gt_ids[is_bbox_in_ignored_gts] = -1
+        # 4. Assign prior bboxes with class label according to its gt id.
+        assigned_labels = None
+        shadowed_pixel_labels = None
+        if gt_labels is not None:
+            # Default assigned label is the background (-1)
+            assigned_labels = assigned_gt_ids.new_full((num_bboxes, ), -1)
+            pos_inds = torch.nonzero(
+                assigned_gt_ids > 0, as_tuple=False).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[assigned_gt_ids[pos_inds]
+                                                      - 1]
+            # 5. Find pixels lying in the shadow of an object
+            shadowed_pixel_labels = pixels_in_gt_shadow.clone()
+            if pixels_in_gt_shadow.numel() > 0:
+                pixel_idx, gt_idx =\
+                    pixels_in_gt_shadow[:, 0], pixels_in_gt_shadow[:, 1]
+                assert (assigned_gt_ids[pixel_idx] != gt_idx).all(), \
+                    'Some pixels are dually assigned to ignore and gt!'
+                shadowed_pixel_labels[:, 1] = gt_labels[gt_idx - 1]
+                override = (
+                    assigned_labels[pixel_idx] == shadowed_pixel_labels[:, 1])
+                if self.foreground_dominate:
+                    # When a pixel is both positive and shadowed, set it as pos
+                    shadowed_pixel_labels = shadowed_pixel_labels[~override]
+                else:
+                    # When a pixel is both pos and shadowed, set it as shadowed
+                    assigned_labels[pixel_idx[override]] = -1
+                    assigned_gt_ids[pixel_idx[override]] = 0
+        assign_result = AssignResult(
+            num_gts, assigned_gt_ids, None, labels=assigned_labels)
+        # Add shadowed_labels as assign_result property. Shape: (num_shadow, 2)
+        assign_result.set_extra_property('shadowed_labels',
+                                         shadowed_pixel_labels)
+        return assign_result
+    def assign_one_hot_gt_indices(self,
+                                  is_bbox_in_gt_core,
+                                  is_bbox_in_gt_shadow,
+                                  gt_priority=None):
+        """Assign only one gt index to each prior box.
+        Gts with large gt_priority are more likely to be assigned.
+        Args:
+            is_bbox_in_gt_core (Tensor): Bool tensor indicating the bbox center
+              is in the core area of a gt (e.g. 0-0.2).
+              Shape: (num_prior, num_gt).
+            is_bbox_in_gt_shadow (Tensor): Bool tensor indicating the bbox
+              center is in the shadowed area of a gt (e.g. 0.2-0.5).
+              Shape: (num_prior, num_gt).
+            gt_priority (Tensor): Priorities of gts. The gt with a higher
+              priority is more likely to be assigned to the bbox when the bbox
+              match with multiple gts. Shape: (num_gt, ).
+        Returns:
+            tuple: Returns (assigned_gt_inds, shadowed_gt_inds).
+                - assigned_gt_inds: The assigned gt index of each prior bbox \
+                    (i.e. index from 1 to num_gts). Shape: (num_prior, ).
+                - shadowed_gt_inds: shadowed gt indices. It is a tensor of \
+                    shape (num_ignore, 2) with first column being the \
+                    shadowed prior bbox indices and the second column the \
+                    shadowed gt indices (1-based).
+        """
+        num_bboxes, num_gts = is_bbox_in_gt_core.shape
+        if gt_priority is None:
+            gt_priority = torch.arange(
+                num_gts, device=is_bbox_in_gt_core.device)
+        assert gt_priority.size(0) == num_gts
+        # The bigger gt_priority, the more preferable to be assigned
+        # The assigned inds are by default 0 (background)
+        assigned_gt_inds = is_bbox_in_gt_core.new_zeros((num_bboxes, ),
+                                                        dtype=torch.long)
+        # Shadowed bboxes are assigned to be background. But the corresponding
+        #   label is ignored during loss calculation, which is done through
+        #   shadowed_gt_inds
+        shadowed_gt_inds = torch.nonzero(is_bbox_in_gt_shadow, as_tuple=False)
+        if is_bbox_in_gt_core.sum() == 0:  # No gt match
+            shadowed_gt_inds[:, 1] += 1  # 1-based. For consistency issue
+            return assigned_gt_inds, shadowed_gt_inds
+        # The priority of each prior box and gt pair. If one prior box is
+        #  matched bo multiple gts. Only the pair with the highest priority
+        #  is saved
+        pair_priority = is_bbox_in_gt_core.new_full((num_bboxes, num_gts),
+                                                    -1,
+                                                    dtype=torch.long)
+        # Each bbox could match with multiple gts.
+        # The following codes deal with this situation
+        # Matched  bboxes (to any gt). Shape: (num_pos_anchor, )
+        inds_of_match = torch.any(is_bbox_in_gt_core, dim=1)
+        # The matched gt index of each positive bbox. Length >= num_pos_anchor
+        #   , since one bbox could match multiple gts
+        matched_bbox_gt_inds = torch.nonzero(
+            is_bbox_in_gt_core, as_tuple=False)[:, 1]
+        # Assign priority to each bbox-gt pair.
+        pair_priority[is_bbox_in_gt_core] = gt_priority[matched_bbox_gt_inds]
+        _, argmax_priority = pair_priority[inds_of_match].max(dim=1)
+        assigned_gt_inds[inds_of_match] = argmax_priority + 1  # 1-based
+        # Zero-out the assigned anchor box to filter the shadowed gt indices
+        is_bbox_in_gt_core[inds_of_match, argmax_priority] = 0
+        # Concat the shadowed indices due to overlapping with that out side of
+        #   effective scale. shape: (total_num_ignore, 2)
+        shadowed_gt_inds = torch.cat(
+            (shadowed_gt_inds, torch.nonzero(
+                is_bbox_in_gt_core, as_tuple=False)),
+            dim=0)
+        # `is_bbox_in_gt_core` should be changed back to keep arguments intact.
+        is_bbox_in_gt_core[inds_of_match, argmax_priority] = 1
+        # 1-based shadowed gt indices, to be consistent with `assigned_gt_inds`
+        if shadowed_gt_inds.numel() > 0:
+            shadowed_gt_inds[:, 1] += 1
+        return assigned_gt_inds, shadowed_gt_inds

mmdet/core/bbox/assigners/grid_assigner.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import torch
+from ..builder import BBOX_ASSIGNERS
+from ..iou_calculators import build_iou_calculator
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+@BBOX_ASSIGNERS.register_module()
+class GridAssigner(BaseAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+    Each proposals will be assigned with `-1`, `0`, or a positive integer
+    indicating the ground truth index.
+    - -1: don't care
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        pos_iou_thr (float): IoU threshold for positive bboxes.
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
+            positive bbox. Positive samples can have smaller IoU than
+            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+        gt_max_assign_all (bool): Whether to assign all bboxes with the same
+            highest overlap with some gt to that gt.
+    """
+    def __init__(self,
+                 pos_iou_thr,
+                 neg_iou_thr,
+                 min_pos_iou=.0,
+                 gt_max_assign_all=True,
+                 iou_calculator=dict(type='BboxOverlaps2D')):
+        self.pos_iou_thr = pos_iou_thr
+        self.neg_iou_thr = neg_iou_thr
+        self.min_pos_iou = min_pos_iou
+        self.gt_max_assign_all = gt_max_assign_all
+        self.iou_calculator = build_iou_calculator(iou_calculator)
+    def assign(self, bboxes, box_responsible_flags, gt_bboxes, gt_labels=None):
+        """Assign gt to bboxes. The process is very much like the max iou
+        assigner, except that positive samples are constrained within the cell
+        that the gt boxes fell in.
+        This method assign a gt bbox to every bbox (proposal/anchor), each bbox
+        will be assigned with -1, 0, or a positive number. -1 means don't care,
+        0 means negative sample, positive number is the index (1-based) of
+        assigned gt.
+        The assignment is done in following steps, the order matters.
+        1. assign every bbox to -1
+        2. assign proposals whose iou with all gts <= neg_iou_thr to 0
+        3. for each bbox within a cell, if the iou with its nearest gt >
+            pos_iou_thr and the center of that gt falls inside the cell,
+            assign it to that bbox
+        4. for each gt bbox, assign its nearest proposals within the cell the
+            gt bbox falls in to itself.
+        Args:
+            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+            box_responsible_flags (Tensor): flag to indicate whether box is
+                responsible for prediction, shape(n, )
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+        # compute iou between all gt and bboxes
+        overlaps = self.iou_calculator(gt_bboxes, bboxes)
+        # 1. assign -1 by default
+        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+                                             -1,
+                                             dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = overlaps.new_zeros((num_bboxes, ))
+            if num_gts == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if gt_labels is None:
+                assigned_labels = None
+            else:
+                assigned_labels = overlaps.new_full((num_bboxes, ),
+                                                    -1,
+                                                    dtype=torch.long)
+            return AssignResult(
+                num_gts,
+                assigned_gt_inds,
+                max_overlaps,
+                labels=assigned_labels)
+        # 2. assign negative: below
+        # for each anchor, which gt best overlaps with it
+        # for each anchor, the max iou of all gts
+        # shape of max_overlaps == argmax_overlaps == num_bboxes
+        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+        if isinstance(self.neg_iou_thr, float):
+            assigned_gt_inds[(max_overlaps >= 0)
+                             & (max_overlaps <= self.neg_iou_thr)] = 0
+        elif isinstance(self.neg_iou_thr, (tuple, list)):
+            assert len(self.neg_iou_thr) == 2
+            assigned_gt_inds[(max_overlaps > self.neg_iou_thr[0])
+                             & (max_overlaps <= self.neg_iou_thr[1])] = 0
+        # 3. assign positive: falls into responsible cell and above
+        # positive IOU threshold, the order matters.
+        # the prior condition of comparision is to filter out all
+        # unrelated anchors, i.e. not box_responsible_flags
+        overlaps[:, ~box_responsible_flags.type(torch.bool)] = -1.
+        # calculate max_overlaps again, but this time we only consider IOUs
+        # for anchors responsible for prediction
+        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+        # for each gt, which anchor best overlaps with it
+        # for each gt, the max iou of all proposals
+        # shape of gt_max_overlaps == gt_argmax_overlaps == num_gts
+        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
+        pos_inds = (max_overlaps >
+                    self.pos_iou_thr) & box_responsible_flags.type(torch.bool)
+        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
+        # 4. assign positive to max overlapped anchors within responsible cell
+        for i in range(num_gts):
+            if gt_max_overlaps[i] > self.min_pos_iou:
+                if self.gt_max_assign_all:
+                    max_iou_inds = (overlaps[i, :] == gt_max_overlaps[i]) & \
+                         box_responsible_flags.type(torch.bool)
+                    assigned_gt_inds[max_iou_inds] = i + 1
+                elif box_responsible_flags[gt_argmax_overlaps[i]]:
+                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
+        # assign labels of positive anchors
+        if gt_labels is not None:
+            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)
+            pos_inds = torch.nonzero(
+                assigned_gt_inds > 0, as_tuple=False).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[
+                    assigned_gt_inds[pos_inds] - 1]
+        else:
+            assigned_labels = None
+        return AssignResult(
+            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)